linux/arch/x86/kernel/amd_iommu.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
   3 * Author: Joerg Roedel <joerg.roedel@amd.com>
   4 *         Leo Duran <leo.duran@amd.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published
   8 * by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful,
  11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 * GNU General Public License for more details.
  14 *
  15 * You should have received a copy of the GNU General Public License
  16 * along with this program; if not, write to the Free Software
  17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  18 */
  19
  20#include <linux/pci.h>
  21#include <linux/gfp.h>
  22#include <linux/bitops.h>
  23#include <linux/debugfs.h>
  24#include <linux/scatterlist.h>
  25#include <linux/dma-mapping.h>
  26#include <linux/iommu-helper.h>
  27#include <linux/iommu.h>
  28#include <asm/proto.h>
  29#include <asm/iommu.h>
  30#include <asm/gart.h>
  31#include <asm/amd_iommu_types.h>
  32#include <asm/amd_iommu.h>
  33
  34#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
  35
  36#define EXIT_LOOP_COUNT 10000000
  37
  38static DEFINE_RWLOCK(amd_iommu_devtable_lock);
  39
  40/* A list of preallocated protection domains */
  41static LIST_HEAD(iommu_pd_list);
  42static DEFINE_SPINLOCK(iommu_pd_list_lock);
  43
  44/*
  45 * Domain for untranslated devices - only allocated
  46 * if iommu=pt passed on kernel cmd line.
  47 */
  48static struct protection_domain *pt_domain;
  49
  50static struct iommu_ops amd_iommu_ops;
  51
  52/*
  53 * general struct to manage commands send to an IOMMU
  54 */
  55struct iommu_cmd {
  56        u32 data[4];
  57};
  58
  59static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
  60                             struct unity_map_entry *e);
  61static struct dma_ops_domain *find_protection_domain(u16 devid);
  62static u64 *alloc_pte(struct protection_domain *domain,
  63                      unsigned long address, int end_lvl,
  64                      u64 **pte_page, gfp_t gfp);
  65static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
  66                                      unsigned long start_page,
  67                                      unsigned int pages);
  68static void reset_iommu_command_buffer(struct amd_iommu *iommu);
  69static u64 *fetch_pte(struct protection_domain *domain,
  70                      unsigned long address, int map_size);
  71static void update_domain(struct protection_domain *domain);
  72
  73#ifdef CONFIG_AMD_IOMMU_STATS
  74
  75/*
  76 * Initialization code for statistics collection
  77 */
  78
  79DECLARE_STATS_COUNTER(compl_wait);
  80DECLARE_STATS_COUNTER(cnt_map_single);
  81DECLARE_STATS_COUNTER(cnt_unmap_single);
  82DECLARE_STATS_COUNTER(cnt_map_sg);
  83DECLARE_STATS_COUNTER(cnt_unmap_sg);
  84DECLARE_STATS_COUNTER(cnt_alloc_coherent);
  85DECLARE_STATS_COUNTER(cnt_free_coherent);
  86DECLARE_STATS_COUNTER(cross_page);
  87DECLARE_STATS_COUNTER(domain_flush_single);
  88DECLARE_STATS_COUNTER(domain_flush_all);
  89DECLARE_STATS_COUNTER(alloced_io_mem);
  90DECLARE_STATS_COUNTER(total_map_requests);
  91
  92static struct dentry *stats_dir;
  93static struct dentry *de_isolate;
  94static struct dentry *de_fflush;
  95
  96static void amd_iommu_stats_add(struct __iommu_counter *cnt)
  97{
  98        if (stats_dir == NULL)
  99                return;
 100
 101        cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
 102                                       &cnt->value);
 103}
 104
 105static void amd_iommu_stats_init(void)
 106{
 107        stats_dir = debugfs_create_dir("amd-iommu", NULL);
 108        if (stats_dir == NULL)
 109                return;
 110
 111        de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
 112                                         (u32 *)&amd_iommu_isolate);
 113
 114        de_fflush  = debugfs_create_bool("fullflush", 0444, stats_dir,
 115                                         (u32 *)&amd_iommu_unmap_flush);
 116
 117        amd_iommu_stats_add(&compl_wait);
 118        amd_iommu_stats_add(&cnt_map_single);
 119        amd_iommu_stats_add(&cnt_unmap_single);
 120        amd_iommu_stats_add(&cnt_map_sg);
 121        amd_iommu_stats_add(&cnt_unmap_sg);
 122        amd_iommu_stats_add(&cnt_alloc_coherent);
 123        amd_iommu_stats_add(&cnt_free_coherent);
 124        amd_iommu_stats_add(&cross_page);
 125        amd_iommu_stats_add(&domain_flush_single);
 126        amd_iommu_stats_add(&domain_flush_all);
 127        amd_iommu_stats_add(&alloced_io_mem);
 128        amd_iommu_stats_add(&total_map_requests);
 129}
 130
 131#endif
 132
 133/* returns !0 if the IOMMU is caching non-present entries in its TLB */
 134static int iommu_has_npcache(struct amd_iommu *iommu)
 135{
 136        return iommu->cap & (1UL << IOMMU_CAP_NPCACHE);
 137}
 138
 139/****************************************************************************
 140 *
 141 * Interrupt handling functions
 142 *
 143 ****************************************************************************/
 144
 145static void dump_dte_entry(u16 devid)
 146{
 147        int i;
 148
 149        for (i = 0; i < 8; ++i)
 150                pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
 151                        amd_iommu_dev_table[devid].data[i]);
 152}
 153
 154static void dump_command(unsigned long phys_addr)
 155{
 156        struct iommu_cmd *cmd = phys_to_virt(phys_addr);
 157        int i;
 158
 159        for (i = 0; i < 4; ++i)
 160                pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
 161}
 162
 163static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 164{
 165        u32 *event = __evt;
 166        int type  = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
 167        int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
 168        int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
 169        int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
 170        u64 address = (u64)(((u64)event[3]) << 32) | event[2];
 171
 172        printk(KERN_ERR "AMD-Vi: Event logged [");
 173
 174        switch (type) {
 175        case EVENT_TYPE_ILL_DEV:
 176                printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
 177                       "address=0x%016llx flags=0x%04x]\n",
 178                       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 179                       address, flags);
 180                dump_dte_entry(devid);
 181                break;
 182        case EVENT_TYPE_IO_FAULT:
 183                printk("IO_PAGE_FAULT device=%02x:%02x.%x "
 184                       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
 185                       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 186                       domid, address, flags);
 187                break;
 188        case EVENT_TYPE_DEV_TAB_ERR:
 189                printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
 190                       "address=0x%016llx flags=0x%04x]\n",
 191                       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 192                       address, flags);
 193                break;
 194        case EVENT_TYPE_PAGE_TAB_ERR:
 195                printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
 196                       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
 197                       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 198                       domid, address, flags);
 199                break;
 200        case EVENT_TYPE_ILL_CMD:
 201                printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
 202                reset_iommu_command_buffer(iommu);
 203                dump_command(address);
 204                break;
 205        case EVENT_TYPE_CMD_HARD_ERR:
 206                printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
 207                       "flags=0x%04x]\n", address, flags);
 208                break;
 209        case EVENT_TYPE_IOTLB_INV_TO:
 210                printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
 211                       "address=0x%016llx]\n",
 212                       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 213                       address);
 214                break;
 215        case EVENT_TYPE_INV_DEV_REQ:
 216                printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
 217                       "address=0x%016llx flags=0x%04x]\n",
 218                       PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
 219                       address, flags);
 220                break;
 221        default:
 222                printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
 223        }
 224}
 225
 226static void iommu_poll_events(struct amd_iommu *iommu)
 227{
 228        u32 head, tail;
 229        unsigned long flags;
 230
 231        spin_lock_irqsave(&iommu->lock, flags);
 232
 233        head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 234        tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
 235
 236        while (head != tail) {
 237                iommu_print_event(iommu, iommu->evt_buf + head);
 238                head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
 239        }
 240
 241        writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 242
 243        spin_unlock_irqrestore(&iommu->lock, flags);
 244}
 245
 246irqreturn_t amd_iommu_int_handler(int irq, void *data)
 247{
 248        struct amd_iommu *iommu;
 249
 250        for_each_iommu(iommu)
 251                iommu_poll_events(iommu);
 252
 253        return IRQ_HANDLED;
 254}
 255
 256/****************************************************************************
 257 *
 258 * IOMMU command queuing functions
 259 *
 260 ****************************************************************************/
 261
 262/*
 263 * Writes the command to the IOMMUs command buffer and informs the
 264 * hardware about the new command. Must be called with iommu->lock held.
 265 */
 266static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 267{
 268        u32 tail, head;
 269        u8 *target;
 270
 271        tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 272        target = iommu->cmd_buf + tail;
 273        memcpy_toio(target, cmd, sizeof(*cmd));
 274        tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
 275        head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 276        if (tail == head)
 277                return -ENOMEM;
 278        writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 279
 280        return 0;
 281}
 282
 283/*
 284 * General queuing function for commands. Takes iommu->lock and calls
 285 * __iommu_queue_command().
 286 */
 287static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 288{
 289        unsigned long flags;
 290        int ret;
 291
 292        spin_lock_irqsave(&iommu->lock, flags);
 293        ret = __iommu_queue_command(iommu, cmd);
 294        if (!ret)
 295                iommu->need_sync = true;
 296        spin_unlock_irqrestore(&iommu->lock, flags);
 297
 298        return ret;
 299}
 300
 301/*
 302 * This function waits until an IOMMU has completed a completion
 303 * wait command
 304 */
 305static void __iommu_wait_for_completion(struct amd_iommu *iommu)
 306{
 307        int ready = 0;
 308        unsigned status = 0;
 309        unsigned long i = 0;
 310
 311        INC_STATS_COUNTER(compl_wait);
 312
 313        while (!ready && (i < EXIT_LOOP_COUNT)) {
 314                ++i;
 315                /* wait for the bit to become one */
 316                status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 317                ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
 318        }
 319
 320        /* set bit back to zero */
 321        status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
 322        writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
 323
 324        if (unlikely(i == EXIT_LOOP_COUNT)) {
 325                spin_unlock(&iommu->lock);
 326                reset_iommu_command_buffer(iommu);
 327                spin_lock(&iommu->lock);
 328        }
 329}
 330
 331/*
 332 * This function queues a completion wait command into the command
 333 * buffer of an IOMMU
 334 */
 335static int __iommu_completion_wait(struct amd_iommu *iommu)
 336{
 337        struct iommu_cmd cmd;
 338
 339         memset(&cmd, 0, sizeof(cmd));
 340         cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
 341         CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
 342
 343         return __iommu_queue_command(iommu, &cmd);
 344}
 345
 346/*
 347 * This function is called whenever we need to ensure that the IOMMU has
 348 * completed execution of all commands we sent. It sends a
 349 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
 350 * us about that by writing a value to a physical address we pass with
 351 * the command.
 352 */
 353static int iommu_completion_wait(struct amd_iommu *iommu)
 354{
 355        int ret = 0;
 356        unsigned long flags;
 357
 358        spin_lock_irqsave(&iommu->lock, flags);
 359
 360        if (!iommu->need_sync)
 361                goto out;
 362
 363        ret = __iommu_completion_wait(iommu);
 364
 365        iommu->need_sync = false;
 366
 367        if (ret)
 368                goto out;
 369
 370        __iommu_wait_for_completion(iommu);
 371
 372out:
 373        spin_unlock_irqrestore(&iommu->lock, flags);
 374
 375        return 0;
 376}
 377
 378/*
 379 * Command send function for invalidating a device table entry
 380 */
 381static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
 382{
 383        struct iommu_cmd cmd;
 384        int ret;
 385
 386        BUG_ON(iommu == NULL);
 387
 388        memset(&cmd, 0, sizeof(cmd));
 389        CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
 390        cmd.data[0] = devid;
 391
 392        ret = iommu_queue_command(iommu, &cmd);
 393
 394        return ret;
 395}
 396
 397static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
 398                                          u16 domid, int pde, int s)
 399{
 400        memset(cmd, 0, sizeof(*cmd));
 401        address &= PAGE_MASK;
 402        CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
 403        cmd->data[1] |= domid;
 404        cmd->data[2] = lower_32_bits(address);
 405        cmd->data[3] = upper_32_bits(address);
 406        if (s) /* size bit - we flush more than one 4kb page */
 407                cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
 408        if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
 409                cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
 410}
 411
 412/*
 413 * Generic command send function for invalidaing TLB entries
 414 */
 415static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 416                u64 address, u16 domid, int pde, int s)
 417{
 418        struct iommu_cmd cmd;
 419        int ret;
 420
 421        __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
 422
 423        ret = iommu_queue_command(iommu, &cmd);
 424
 425        return ret;
 426}
 427
 428/*
 429 * TLB invalidation function which is called from the mapping functions.
 430 * It invalidates a single PTE if the range to flush is within a single
 431 * page. Otherwise it flushes the whole TLB of the IOMMU.
 432 */
 433static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
 434                u64 address, size_t size)
 435{
 436        int s = 0;
 437        unsigned pages = iommu_num_pages(address, size, PAGE_SIZE);
 438
 439        address &= PAGE_MASK;
 440
 441        if (pages > 1) {
 442                /*
 443                 * If we have to flush more than one page, flush all
 444                 * TLB entries for this domain
 445                 */
 446                address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
 447                s = 1;
 448        }
 449
 450        iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
 451
 452        return 0;
 453}
 454
 455/* Flush the whole IO/TLB for a given protection domain */
 456static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
 457{
 458        u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
 459
 460        INC_STATS_COUNTER(domain_flush_single);
 461
 462        iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
 463}
 464
 465/* Flush the whole IO/TLB for a given protection domain - including PDE */
 466static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid)
 467{
 468       u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
 469
 470       INC_STATS_COUNTER(domain_flush_single);
 471
 472       iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1);
 473}
 474
 475/*
 476 * This function flushes one domain on one IOMMU
 477 */
 478static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid)
 479{
 480        struct iommu_cmd cmd;
 481        unsigned long flags;
 482
 483        __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
 484                                      domid, 1, 1);
 485
 486        spin_lock_irqsave(&iommu->lock, flags);
 487        __iommu_queue_command(iommu, &cmd);
 488        __iommu_completion_wait(iommu);
 489        __iommu_wait_for_completion(iommu);
 490        spin_unlock_irqrestore(&iommu->lock, flags);
 491}
 492
 493static void flush_all_domains_on_iommu(struct amd_iommu *iommu)
 494{
 495        int i;
 496
 497        for (i = 1; i < MAX_DOMAIN_ID; ++i) {
 498                if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
 499                        continue;
 500                flush_domain_on_iommu(iommu, i);
 501        }
 502
 503}
 504
 505/*
 506 * This function is used to flush the IO/TLB for a given protection domain
 507 * on every IOMMU in the system
 508 */
 509static void iommu_flush_domain(u16 domid)
 510{
 511        struct amd_iommu *iommu;
 512
 513        INC_STATS_COUNTER(domain_flush_all);
 514
 515        for_each_iommu(iommu)
 516                flush_domain_on_iommu(iommu, domid);
 517}
 518
 519void amd_iommu_flush_all_domains(void)
 520{
 521        struct amd_iommu *iommu;
 522
 523        for_each_iommu(iommu)
 524                flush_all_domains_on_iommu(iommu);
 525}
 526
 527static void flush_all_devices_for_iommu(struct amd_iommu *iommu)
 528{
 529        int i;
 530
 531        for (i = 0; i <= amd_iommu_last_bdf; ++i) {
 532                if (iommu != amd_iommu_rlookup_table[i])
 533                        continue;
 534
 535                iommu_queue_inv_dev_entry(iommu, i);
 536                iommu_completion_wait(iommu);
 537        }
 538}
 539
 540static void flush_devices_by_domain(struct protection_domain *domain)
 541{
 542        struct amd_iommu *iommu;
 543        int i;
 544
 545        for (i = 0; i <= amd_iommu_last_bdf; ++i) {
 546                if ((domain == NULL && amd_iommu_pd_table[i] == NULL) ||
 547                    (amd_iommu_pd_table[i] != domain))
 548                        continue;
 549
 550                iommu = amd_iommu_rlookup_table[i];
 551                if (!iommu)
 552                        continue;
 553
 554                iommu_queue_inv_dev_entry(iommu, i);
 555                iommu_completion_wait(iommu);
 556        }
 557}
 558
 559static void reset_iommu_command_buffer(struct amd_iommu *iommu)
 560{
 561        pr_err("AMD-Vi: Resetting IOMMU command buffer\n");
 562
 563        if (iommu->reset_in_progress)
 564                panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n");
 565
 566        iommu->reset_in_progress = true;
 567
 568        amd_iommu_reset_cmd_buffer(iommu);
 569        flush_all_devices_for_iommu(iommu);
 570        flush_all_domains_on_iommu(iommu);
 571
 572        iommu->reset_in_progress = false;
 573}
 574
 575void amd_iommu_flush_all_devices(void)
 576{
 577        flush_devices_by_domain(NULL);
 578}
 579
 580/****************************************************************************
 581 *
 582 * The functions below are used the create the page table mappings for
 583 * unity mapped regions.
 584 *
 585 ****************************************************************************/
 586
 587/*
 588 * Generic mapping functions. It maps a physical address into a DMA
 589 * address space. It allocates the page table pages if necessary.
 590 * In the future it can be extended to a generic mapping function
 591 * supporting all features of AMD IOMMU page tables like level skipping
 592 * and full 64 bit address spaces.
 593 */
 594static int iommu_map_page(struct protection_domain *dom,
 595                          unsigned long bus_addr,
 596                          unsigned long phys_addr,
 597                          int prot,
 598                          int map_size)
 599{
 600        u64 __pte, *pte;
 601
 602        bus_addr  = PAGE_ALIGN(bus_addr);
 603        phys_addr = PAGE_ALIGN(phys_addr);
 604
 605        BUG_ON(!PM_ALIGNED(map_size, bus_addr));
 606        BUG_ON(!PM_ALIGNED(map_size, phys_addr));
 607
 608        if (!(prot & IOMMU_PROT_MASK))
 609                return -EINVAL;
 610
 611        pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL);
 612
 613        if (IOMMU_PTE_PRESENT(*pte))
 614                return -EBUSY;
 615
 616        __pte = phys_addr | IOMMU_PTE_P;
 617        if (prot & IOMMU_PROT_IR)
 618                __pte |= IOMMU_PTE_IR;
 619        if (prot & IOMMU_PROT_IW)
 620                __pte |= IOMMU_PTE_IW;
 621
 622        *pte = __pte;
 623
 624        update_domain(dom);
 625
 626        return 0;
 627}
 628
 629static void iommu_unmap_page(struct protection_domain *dom,
 630                             unsigned long bus_addr, int map_size)
 631{
 632        u64 *pte = fetch_pte(dom, bus_addr, map_size);
 633
 634        if (pte)
 635                *pte = 0;
 636}
 637
 638/*
 639 * This function checks if a specific unity mapping entry is needed for
 640 * this specific IOMMU.
 641 */
 642static int iommu_for_unity_map(struct amd_iommu *iommu,
 643                               struct unity_map_entry *entry)
 644{
 645        u16 bdf, i;
 646
 647        for (i = entry->devid_start; i <= entry->devid_end; ++i) {
 648                bdf = amd_iommu_alias_table[i];
 649                if (amd_iommu_rlookup_table[bdf] == iommu)
 650                        return 1;
 651        }
 652
 653        return 0;
 654}
 655
 656/*
 657 * Init the unity mappings for a specific IOMMU in the system
 658 *
 659 * Basically iterates over all unity mapping entries and applies them to
 660 * the default domain DMA of that IOMMU if necessary.
 661 */
 662static int iommu_init_unity_mappings(struct amd_iommu *iommu)
 663{
 664        struct unity_map_entry *entry;
 665        int ret;
 666
 667        list_for_each_entry(entry, &amd_iommu_unity_map, list) {
 668                if (!iommu_for_unity_map(iommu, entry))
 669                        continue;
 670                ret = dma_ops_unity_map(iommu->default_dom, entry);
 671                if (ret)
 672                        return ret;
 673        }
 674
 675        return 0;
 676}
 677
 678/*
 679 * This function actually applies the mapping to the page table of the
 680 * dma_ops domain.
 681 */
 682static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
 683                             struct unity_map_entry *e)
 684{
 685        u64 addr;
 686        int ret;
 687
 688        for (addr = e->address_start; addr < e->address_end;
 689             addr += PAGE_SIZE) {
 690                ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot,
 691                                     PM_MAP_4k);
 692                if (ret)
 693                        return ret;
 694                /*
 695                 * if unity mapping is in aperture range mark the page
 696                 * as allocated in the aperture
 697                 */
 698                if (addr < dma_dom->aperture_size)
 699                        __set_bit(addr >> PAGE_SHIFT,
 700                                  dma_dom->aperture[0]->bitmap);
 701        }
 702
 703        return 0;
 704}
 705
 706/*
 707 * Inits the unity mappings required for a specific device
 708 */
 709static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
 710                                          u16 devid)
 711{
 712        struct unity_map_entry *e;
 713        int ret;
 714
 715        list_for_each_entry(e, &amd_iommu_unity_map, list) {
 716                if (!(devid >= e->devid_start && devid <= e->devid_end))
 717                        continue;
 718                ret = dma_ops_unity_map(dma_dom, e);
 719                if (ret)
 720                        return ret;
 721        }
 722
 723        return 0;
 724}
 725
 726/****************************************************************************
 727 *
 728 * The next functions belong to the address allocator for the dma_ops
 729 * interface functions. They work like the allocators in the other IOMMU
 730 * drivers. Its basically a bitmap which marks the allocated pages in
 731 * the aperture. Maybe it could be enhanced in the future to a more
 732 * efficient allocator.
 733 *
 734 ****************************************************************************/
 735
 736/*
 737 * The address allocator core functions.
 738 *
 739 * called with domain->lock held
 740 */
 741
 742/*
 743 * This function checks if there is a PTE for a given dma address. If
 744 * there is one, it returns the pointer to it.
 745 */
 746static u64 *fetch_pte(struct protection_domain *domain,
 747                      unsigned long address, int map_size)
 748{
 749        int level;
 750        u64 *pte;
 751
 752        level =  domain->mode - 1;
 753        pte   = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
 754
 755        while (level > map_size) {
 756                if (!IOMMU_PTE_PRESENT(*pte))
 757                        return NULL;
 758
 759                level -= 1;
 760
 761                pte = IOMMU_PTE_PAGE(*pte);
 762                pte = &pte[PM_LEVEL_INDEX(level, address)];
 763
 764                if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) {
 765                        pte = NULL;
 766                        break;
 767                }
 768        }
 769
 770        return pte;
 771}
 772
 773/*
 774 * This function is used to add a new aperture range to an existing
 775 * aperture in case of dma_ops domain allocation or address allocation
 776 * failure.
 777 */
 778static int alloc_new_range(struct amd_iommu *iommu,
 779                           struct dma_ops_domain *dma_dom,
 780                           bool populate, gfp_t gfp)
 781{
 782        int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
 783        int i;
 784
 785#ifdef CONFIG_IOMMU_STRESS
 786        populate = false;
 787#endif
 788
 789        if (index >= APERTURE_MAX_RANGES)
 790                return -ENOMEM;
 791
 792        dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
 793        if (!dma_dom->aperture[index])
 794                return -ENOMEM;
 795
 796        dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
 797        if (!dma_dom->aperture[index]->bitmap)
 798                goto out_free;
 799
 800        dma_dom->aperture[index]->offset = dma_dom->aperture_size;
 801
 802        if (populate) {
 803                unsigned long address = dma_dom->aperture_size;
 804                int i, num_ptes = APERTURE_RANGE_PAGES / 512;
 805                u64 *pte, *pte_page;
 806
 807                for (i = 0; i < num_ptes; ++i) {
 808                        pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k,
 809                                        &pte_page, gfp);
 810                        if (!pte)
 811                                goto out_free;
 812
 813                        dma_dom->aperture[index]->pte_pages[i] = pte_page;
 814
 815                        address += APERTURE_RANGE_SIZE / 64;
 816                }
 817        }
 818
 819        dma_dom->aperture_size += APERTURE_RANGE_SIZE;
 820
 821        /* Intialize the exclusion range if necessary */
 822        if (iommu->exclusion_start &&
 823            iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
 824            iommu->exclusion_start < dma_dom->aperture_size) {
 825                unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
 826                int pages = iommu_num_pages(iommu->exclusion_start,
 827                                            iommu->exclusion_length,
 828                                            PAGE_SIZE);
 829                dma_ops_reserve_addresses(dma_dom, startpage, pages);
 830        }
 831
 832        /*
 833         * Check for areas already mapped as present in the new aperture
 834         * range and mark those pages as reserved in the allocator. Such
 835         * mappings may already exist as a result of requested unity
 836         * mappings for devices.
 837         */
 838        for (i = dma_dom->aperture[index]->offset;
 839             i < dma_dom->aperture_size;
 840             i += PAGE_SIZE) {
 841                u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k);
 842                if (!pte || !IOMMU_PTE_PRESENT(*pte))
 843                        continue;
 844
 845                dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
 846        }
 847
 848        update_domain(&dma_dom->domain);
 849
 850        return 0;
 851
 852out_free:
 853        update_domain(&dma_dom->domain);
 854
 855        free_page((unsigned long)dma_dom->aperture[index]->bitmap);
 856
 857        kfree(dma_dom->aperture[index]);
 858        dma_dom->aperture[index] = NULL;
 859
 860        return -ENOMEM;
 861}
 862
 863static unsigned long dma_ops_area_alloc(struct device *dev,
 864                                        struct dma_ops_domain *dom,
 865                                        unsigned int pages,
 866                                        unsigned long align_mask,
 867                                        u64 dma_mask,
 868                                        unsigned long start)
 869{
 870        unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
 871        int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
 872        int i = start >> APERTURE_RANGE_SHIFT;
 873        unsigned long boundary_size;
 874        unsigned long address = -1;
 875        unsigned long limit;
 876
 877        next_bit >>= PAGE_SHIFT;
 878
 879        boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
 880                        PAGE_SIZE) >> PAGE_SHIFT;
 881
 882        for (;i < max_index; ++i) {
 883                unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
 884
 885                if (dom->aperture[i]->offset >= dma_mask)
 886                        break;
 887
 888                limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
 889                                               dma_mask >> PAGE_SHIFT);
 890
 891                address = iommu_area_alloc(dom->aperture[i]->bitmap,
 892                                           limit, next_bit, pages, 0,
 893                                            boundary_size, align_mask);
 894                if (address != -1) {
 895                        address = dom->aperture[i]->offset +
 896                                  (address << PAGE_SHIFT);
 897                        dom->next_address = address + (pages << PAGE_SHIFT);
 898                        break;
 899                }
 900
 901                next_bit = 0;
 902        }
 903
 904        return address;
 905}
 906
 907static unsigned long dma_ops_alloc_addresses(struct device *dev,
 908                                             struct dma_ops_domain *dom,
 909                                             unsigned int pages,
 910                                             unsigned long align_mask,
 911                                             u64 dma_mask)
 912{
 913        unsigned long address;
 914
 915#ifdef CONFIG_IOMMU_STRESS
 916        dom->next_address = 0;
 917        dom->need_flush = true;
 918#endif
 919
 920        address = dma_ops_area_alloc(dev, dom, pages, align_mask,
 921                                     dma_mask, dom->next_address);
 922
 923        if (address == -1) {
 924                dom->next_address = 0;
 925                address = dma_ops_area_alloc(dev, dom, pages, align_mask,
 926                                             dma_mask, 0);
 927                dom->need_flush = true;
 928        }
 929
 930        if (unlikely(address == -1))
 931                address = bad_dma_address;
 932
 933        WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
 934
 935        return address;
 936}
 937
 938/*
 939 * The address free function.
 940 *
 941 * called with domain->lock held
 942 */
 943static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 944                                   unsigned long address,
 945                                   unsigned int pages)
 946{
 947        unsigned i = address >> APERTURE_RANGE_SHIFT;
 948        struct aperture_range *range = dom->aperture[i];
 949
 950        BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
 951
 952#ifdef CONFIG_IOMMU_STRESS
 953        if (i < 4)
 954                return;
 955#endif
 956
 957        if (address >= dom->next_address)
 958                dom->need_flush = true;
 959
 960        address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
 961
 962        iommu_area_free(range->bitmap, address, pages);
 963
 964}
 965
 966/****************************************************************************
 967 *
 968 * The next functions belong to the domain allocation. A domain is
 969 * allocated for every IOMMU as the default domain. If device isolation
 970 * is enabled, every device get its own domain. The most important thing
 971 * about domains is the page table mapping the DMA address space they
 972 * contain.
 973 *
 974 ****************************************************************************/
 975
 976static u16 domain_id_alloc(void)
 977{
 978        unsigned long flags;
 979        int id;
 980
 981        write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 982        id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
 983        BUG_ON(id == 0);
 984        if (id > 0 && id < MAX_DOMAIN_ID)
 985                __set_bit(id, amd_iommu_pd_alloc_bitmap);
 986        else
 987                id = 0;
 988        write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 989
 990        return id;
 991}
 992
 993static void domain_id_free(int id)
 994{
 995        unsigned long flags;
 996
 997        write_lock_irqsave(&amd_iommu_devtable_lock, flags);
 998        if (id > 0 && id < MAX_DOMAIN_ID)
 999                __clear_bit(id, amd_iommu_pd_alloc_bitmap);
1000        write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1001}
1002
1003/*
1004 * Used to reserve address ranges in the aperture (e.g. for exclusion
1005 * ranges.
1006 */
1007static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
1008                                      unsigned long start_page,
1009                                      unsigned int pages)
1010{
1011        unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
1012
1013        if (start_page + pages > last_page)
1014                pages = last_page - start_page;
1015
1016        for (i = start_page; i < start_page + pages; ++i) {
1017                int index = i / APERTURE_RANGE_PAGES;
1018                int page  = i % APERTURE_RANGE_PAGES;
1019                __set_bit(page, dom->aperture[index]->bitmap);
1020        }
1021}
1022
1023static void free_pagetable(struct protection_domain *domain)
1024{
1025        int i, j;
1026        u64 *p1, *p2, *p3;
1027
1028        p1 = domain->pt_root;
1029
1030        if (!p1)
1031                return;
1032
1033        for (i = 0; i < 512; ++i) {
1034                if (!IOMMU_PTE_PRESENT(p1[i]))
1035                        continue;
1036
1037                p2 = IOMMU_PTE_PAGE(p1[i]);
1038                for (j = 0; j < 512; ++j) {
1039                        if (!IOMMU_PTE_PRESENT(p2[j]))
1040                                continue;
1041                        p3 = IOMMU_PTE_PAGE(p2[j]);
1042                        free_page((unsigned long)p3);
1043                }
1044
1045                free_page((unsigned long)p2);
1046        }
1047
1048        free_page((unsigned long)p1);
1049
1050        domain->pt_root = NULL;
1051}
1052
1053/*
1054 * Free a domain, only used if something went wrong in the
1055 * allocation path and we need to free an already allocated page table
1056 */
1057static void dma_ops_domain_free(struct dma_ops_domain *dom)
1058{
1059        int i;
1060
1061        if (!dom)
1062                return;
1063
1064        free_pagetable(&dom->domain);
1065
1066        for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
1067                if (!dom->aperture[i])
1068                        continue;
1069                free_page((unsigned long)dom->aperture[i]->bitmap);
1070                kfree(dom->aperture[i]);
1071        }
1072
1073        kfree(dom);
1074}
1075
1076/*
1077 * Allocates a new protection domain usable for the dma_ops functions.
1078 * It also intializes the page table and the address allocator data
1079 * structures required for the dma_ops interface
1080 */
1081static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
1082{
1083        struct dma_ops_domain *dma_dom;
1084
1085        dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
1086        if (!dma_dom)
1087                return NULL;
1088
1089        spin_lock_init(&dma_dom->domain.lock);
1090
1091        dma_dom->domain.id = domain_id_alloc();
1092        if (dma_dom->domain.id == 0)
1093                goto free_dma_dom;
1094        dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
1095        dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1096        dma_dom->domain.flags = PD_DMA_OPS_MASK;
1097        dma_dom->domain.priv = dma_dom;
1098        if (!dma_dom->domain.pt_root)
1099                goto free_dma_dom;
1100
1101        dma_dom->need_flush = false;
1102        dma_dom->target_dev = 0xffff;
1103
1104        if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
1105                goto free_dma_dom;
1106
1107        /*
1108         * mark the first page as allocated so we never return 0 as
1109         * a valid dma-address. So we can use 0 as error value
1110         */
1111        dma_dom->aperture[0]->bitmap[0] = 1;
1112        dma_dom->next_address = 0;
1113
1114
1115        return dma_dom;
1116
1117free_dma_dom:
1118        dma_ops_domain_free(dma_dom);
1119
1120        return NULL;
1121}
1122
1123/*
1124 * little helper function to check whether a given protection domain is a
1125 * dma_ops domain
1126 */
1127static bool dma_ops_domain(struct protection_domain *domain)
1128{
1129        return domain->flags & PD_DMA_OPS_MASK;
1130}
1131
1132/*
1133 * Find out the protection domain structure for a given PCI device. This
1134 * will give us the pointer to the page table root for example.
1135 */
1136static struct protection_domain *domain_for_device(u16 devid)
1137{
1138        struct protection_domain *dom;
1139        unsigned long flags;
1140
1141        read_lock_irqsave(&amd_iommu_devtable_lock, flags);
1142        dom = amd_iommu_pd_table[devid];
1143        read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1144
1145        return dom;
1146}
1147
1148static void set_dte_entry(u16 devid, struct protection_domain *domain)
1149{
1150        u64 pte_root = virt_to_phys(domain->pt_root);
1151
1152        pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
1153                    << DEV_ENTRY_MODE_SHIFT;
1154        pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
1155
1156        amd_iommu_dev_table[devid].data[2] = domain->id;
1157        amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
1158        amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
1159
1160        amd_iommu_pd_table[devid] = domain;
1161}
1162
1163/*
1164 * If a device is not yet associated with a domain, this function does
1165 * assigns it visible for the hardware
1166 */
1167static void __attach_device(struct amd_iommu *iommu,
1168                            struct protection_domain *domain,
1169                            u16 devid)
1170{
1171        /* lock domain */
1172        spin_lock(&domain->lock);
1173
1174        /* update DTE entry */
1175        set_dte_entry(devid, domain);
1176
1177        domain->dev_cnt += 1;
1178
1179        /* ready */
1180        spin_unlock(&domain->lock);
1181}
1182
1183/*
1184 * If a device is not yet associated with a domain, this function does
1185 * assigns it visible for the hardware
1186 */
1187static void attach_device(struct amd_iommu *iommu,
1188                          struct protection_domain *domain,
1189                          u16 devid)
1190{
1191        unsigned long flags;
1192
1193        write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1194        __attach_device(iommu, domain, devid);
1195        write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1196
1197        /*
1198         * We might boot into a crash-kernel here. The crashed kernel
1199         * left the caches in the IOMMU dirty. So we have to flush
1200         * here to evict all dirty stuff.
1201         */
1202        iommu_queue_inv_dev_entry(iommu, devid);
1203        iommu_flush_tlb_pde(iommu, domain->id);
1204}
1205
1206/*
1207 * Removes a device from a protection domain (unlocked)
1208 */
1209static void __detach_device(struct protection_domain *domain, u16 devid)
1210{
1211
1212        /* lock domain */
1213        spin_lock(&domain->lock);
1214
1215        /* remove domain from the lookup table */
1216        amd_iommu_pd_table[devid] = NULL;
1217
1218        /* remove entry from the device table seen by the hardware */
1219        amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
1220        amd_iommu_dev_table[devid].data[1] = 0;
1221        amd_iommu_dev_table[devid].data[2] = 0;
1222
1223        amd_iommu_apply_erratum_63(devid);
1224
1225        /* decrease reference counter */
1226        domain->dev_cnt -= 1;
1227
1228        /* ready */
1229        spin_unlock(&domain->lock);
1230
1231        /*
1232         * If we run in passthrough mode the device must be assigned to the
1233         * passthrough domain if it is detached from any other domain
1234         */
1235        if (iommu_pass_through) {
1236                struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
1237                __attach_device(iommu, pt_domain, devid);
1238        }
1239}
1240
1241/*
1242 * Removes a device from a protection domain (with devtable_lock held)
1243 */
1244static void detach_device(struct protection_domain *domain, u16 devid)
1245{
1246        unsigned long flags;
1247
1248        /* lock device table */
1249        write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1250        __detach_device(domain, devid);
1251        write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1252}
1253
1254static int device_change_notifier(struct notifier_block *nb,
1255                                  unsigned long action, void *data)
1256{
1257        struct device *dev = data;
1258        struct pci_dev *pdev = to_pci_dev(dev);
1259        u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
1260        struct protection_domain *domain;
1261        struct dma_ops_domain *dma_domain;
1262        struct amd_iommu *iommu;
1263        unsigned long flags;
1264
1265        if (devid > amd_iommu_last_bdf)
1266                goto out;
1267
1268        devid = amd_iommu_alias_table[devid];
1269
1270        iommu = amd_iommu_rlookup_table[devid];
1271        if (iommu == NULL)
1272                goto out;
1273
1274        domain = domain_for_device(devid);
1275
1276        if (domain && !dma_ops_domain(domain))
1277                WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
1278                          "to a non-dma-ops domain\n", dev_name(dev));
1279
1280        switch (action) {
1281        case BUS_NOTIFY_UNBOUND_DRIVER:
1282                if (!domain)
1283                        goto out;
1284                if (iommu_pass_through)
1285                        break;
1286                detach_device(domain, devid);
1287                break;
1288        case BUS_NOTIFY_ADD_DEVICE:
1289                /* allocate a protection domain if a device is added */
1290                dma_domain = find_protection_domain(devid);
1291                if (dma_domain)
1292                        goto out;
1293                dma_domain = dma_ops_domain_alloc(iommu);
1294                if (!dma_domain)
1295                        goto out;
1296                dma_domain->target_dev = devid;
1297
1298                spin_lock_irqsave(&iommu_pd_list_lock, flags);
1299                list_add_tail(&dma_domain->list, &iommu_pd_list);
1300                spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1301
1302                break;
1303        default:
1304                goto out;
1305        }
1306
1307        iommu_queue_inv_dev_entry(iommu, devid);
1308        iommu_completion_wait(iommu);
1309
1310out:
1311        return 0;
1312}
1313
1314static struct notifier_block device_nb = {
1315        .notifier_call = device_change_notifier,
1316};
1317
1318/*****************************************************************************
1319 *
1320 * The next functions belong to the dma_ops mapping/unmapping code.
1321 *
1322 *****************************************************************************/
1323
1324/*
1325 * This function checks if the driver got a valid device from the caller to
1326 * avoid dereferencing invalid pointers.
1327 */
1328static bool check_device(struct device *dev)
1329{
1330        if (!dev || !dev->dma_mask)
1331                return false;
1332
1333        return true;
1334}
1335
1336/*
1337 * In this function the list of preallocated protection domains is traversed to
1338 * find the domain for a specific device
1339 */
1340static struct dma_ops_domain *find_protection_domain(u16 devid)
1341{
1342        struct dma_ops_domain *entry, *ret = NULL;
1343        unsigned long flags;
1344
1345        if (list_empty(&iommu_pd_list))
1346                return NULL;
1347
1348        spin_lock_irqsave(&iommu_pd_list_lock, flags);
1349
1350        list_for_each_entry(entry, &iommu_pd_list, list) {
1351                if (entry->target_dev == devid) {
1352                        ret = entry;
1353                        break;
1354                }
1355        }
1356
1357        spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1358
1359        return ret;
1360}
1361
1362/*
1363 * In the dma_ops path we only have the struct device. This function
1364 * finds the corresponding IOMMU, the protection domain and the
1365 * requestor id for a given device.
1366 * If the device is not yet associated with a domain this is also done
1367 * in this function.
1368 */
1369static int get_device_resources(struct device *dev,
1370                                struct amd_iommu **iommu,
1371                                struct protection_domain **domain,
1372                                u16 *bdf)
1373{
1374        struct dma_ops_domain *dma_dom;
1375        struct pci_dev *pcidev;
1376        u16 _bdf;
1377
1378        *iommu = NULL;
1379        *domain = NULL;
1380        *bdf = 0xffff;
1381
1382        if (dev->bus != &pci_bus_type)
1383                return 0;
1384
1385        pcidev = to_pci_dev(dev);
1386        _bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
1387
1388        /* device not translated by any IOMMU in the system? */
1389        if (_bdf > amd_iommu_last_bdf)
1390                return 0;
1391
1392        *bdf = amd_iommu_alias_table[_bdf];
1393
1394        *iommu = amd_iommu_rlookup_table[*bdf];
1395        if (*iommu == NULL)
1396                return 0;
1397        *domain = domain_for_device(*bdf);
1398        if (*domain == NULL) {
1399                dma_dom = find_protection_domain(*bdf);
1400                if (!dma_dom)
1401                        dma_dom = (*iommu)->default_dom;
1402                *domain = &dma_dom->domain;
1403                attach_device(*iommu, *domain, *bdf);
1404                DUMP_printk("Using protection domain %d for device %s\n",
1405                            (*domain)->id, dev_name(dev));
1406        }
1407
1408        if (domain_for_device(_bdf) == NULL)
1409                attach_device(*iommu, *domain, _bdf);
1410
1411        return 1;
1412}
1413
1414static void update_device_table(struct protection_domain *domain)
1415{
1416        unsigned long flags;
1417        int i;
1418
1419        for (i = 0; i <= amd_iommu_last_bdf; ++i) {
1420                if (amd_iommu_pd_table[i] != domain)
1421                        continue;
1422                write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1423                set_dte_entry(i, domain);
1424                write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1425        }
1426}
1427
1428static void update_domain(struct protection_domain *domain)
1429{
1430        if (!domain->updated)
1431                return;
1432
1433        update_device_table(domain);
1434        flush_devices_by_domain(domain);
1435        iommu_flush_domain(domain->id);
1436
1437        domain->updated = false;
1438}
1439
1440/*
1441 * This function is used to add another level to an IO page table. Adding
1442 * another level increases the size of the address space by 9 bits to a size up
1443 * to 64 bits.
1444 */
1445static bool increase_address_space(struct protection_domain *domain,
1446                                   gfp_t gfp)
1447{
1448        u64 *pte;
1449
1450        if (domain->mode == PAGE_MODE_6_LEVEL)
1451                /* address space already 64 bit large */
1452                return false;
1453
1454        pte = (void *)get_zeroed_page(gfp);
1455        if (!pte)
1456                return false;
1457
1458        *pte             = PM_LEVEL_PDE(domain->mode,
1459                                        virt_to_phys(domain->pt_root));
1460        domain->pt_root  = pte;
1461        domain->mode    += 1;
1462        domain->updated  = true;
1463
1464        return true;
1465}
1466
1467static u64 *alloc_pte(struct protection_domain *domain,
1468                      unsigned long address,
1469                      int end_lvl,
1470                      u64 **pte_page,
1471                      gfp_t gfp)
1472{
1473        u64 *pte, *page;
1474        int level;
1475
1476        while (address > PM_LEVEL_SIZE(domain->mode))
1477                increase_address_space(domain, gfp);
1478
1479        level =  domain->mode - 1;
1480        pte   = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
1481
1482        while (level > end_lvl) {
1483                if (!IOMMU_PTE_PRESENT(*pte)) {
1484                        page = (u64 *)get_zeroed_page(gfp);
1485                        if (!page)
1486                                return NULL;
1487                        *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
1488                }
1489
1490                level -= 1;
1491
1492                pte = IOMMU_PTE_PAGE(*pte);
1493
1494                if (pte_page && level == end_lvl)
1495                        *pte_page = pte;
1496
1497                pte = &pte[PM_LEVEL_INDEX(level, address)];
1498        }
1499
1500        return pte;
1501}
1502
1503/*
1504 * This function fetches the PTE for a given address in the aperture
1505 */
1506static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
1507                            unsigned long address)
1508{
1509        struct aperture_range *aperture;
1510        u64 *pte, *pte_page;
1511
1512        aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1513        if (!aperture)
1514                return NULL;
1515
1516        pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1517        if (!pte) {
1518                pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page,
1519                                GFP_ATOMIC);
1520                aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
1521        } else
1522                pte += PM_LEVEL_INDEX(0, address);
1523
1524        update_domain(&dom->domain);
1525
1526        return pte;
1527}
1528
1529/*
1530 * This is the generic map function. It maps one 4kb page at paddr to
1531 * the given address in the DMA address space for the domain.
1532 */
1533static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
1534                                     struct dma_ops_domain *dom,
1535                                     unsigned long address,
1536                                     phys_addr_t paddr,
1537                                     int direction)
1538{
1539        u64 *pte, __pte;
1540
1541        WARN_ON(address > dom->aperture_size);
1542
1543        paddr &= PAGE_MASK;
1544
1545        pte  = dma_ops_get_pte(dom, address);
1546        if (!pte)
1547                return bad_dma_address;
1548
1549        __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1550
1551        if (direction == DMA_TO_DEVICE)
1552                __pte |= IOMMU_PTE_IR;
1553        else if (direction == DMA_FROM_DEVICE)
1554                __pte |= IOMMU_PTE_IW;
1555        else if (direction == DMA_BIDIRECTIONAL)
1556                __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
1557
1558        WARN_ON(*pte);
1559
1560        *pte = __pte;
1561
1562        return (dma_addr_t)address;
1563}
1564
1565/*
1566 * The generic unmapping function for on page in the DMA address space.
1567 */
1568static void dma_ops_domain_unmap(struct amd_iommu *iommu,
1569                                 struct dma_ops_domain *dom,
1570                                 unsigned long address)
1571{
1572        struct aperture_range *aperture;
1573        u64 *pte;
1574
1575        if (address >= dom->aperture_size)
1576                return;
1577
1578        aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
1579        if (!aperture)
1580                return;
1581
1582        pte  = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
1583        if (!pte)
1584                return;
1585
1586        pte += PM_LEVEL_INDEX(0, address);
1587
1588        WARN_ON(!*pte);
1589
1590        *pte = 0ULL;
1591}
1592
1593/*
1594 * This function contains common code for mapping of a physically
1595 * contiguous memory region into DMA address space. It is used by all
1596 * mapping functions provided with this IOMMU driver.
1597 * Must be called with the domain lock held.
1598 */
1599static dma_addr_t __map_single(struct device *dev,
1600                               struct amd_iommu *iommu,
1601                               struct dma_ops_domain *dma_dom,
1602                               phys_addr_t paddr,
1603                               size_t size,
1604                               int dir,
1605                               bool align,
1606                               u64 dma_mask)
1607{
1608        dma_addr_t offset = paddr & ~PAGE_MASK;
1609        dma_addr_t address, start, ret;
1610        unsigned int pages;
1611        unsigned long align_mask = 0;
1612        int i;
1613
1614        pages = iommu_num_pages(paddr, size, PAGE_SIZE);
1615        paddr &= PAGE_MASK;
1616
1617        INC_STATS_COUNTER(total_map_requests);
1618
1619        if (pages > 1)
1620                INC_STATS_COUNTER(cross_page);
1621
1622        if (align)
1623                align_mask = (1UL << get_order(size)) - 1;
1624
1625retry:
1626        address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
1627                                          dma_mask);
1628        if (unlikely(address == bad_dma_address)) {
1629                /*
1630                 * setting next_address here will let the address
1631                 * allocator only scan the new allocated range in the
1632                 * first run. This is a small optimization.
1633                 */
1634                dma_dom->next_address = dma_dom->aperture_size;
1635
1636                if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
1637                        goto out;
1638
1639                /*
1640                 * aperture was sucessfully enlarged by 128 MB, try
1641                 * allocation again
1642                 */
1643                goto retry;
1644        }
1645
1646        start = address;
1647        for (i = 0; i < pages; ++i) {
1648                ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
1649                if (ret == bad_dma_address)
1650                        goto out_unmap;
1651
1652                paddr += PAGE_SIZE;
1653                start += PAGE_SIZE;
1654        }
1655        address += offset;
1656
1657        ADD_STATS_COUNTER(alloced_io_mem, size);
1658
1659        if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
1660                iommu_flush_tlb(iommu, dma_dom->domain.id);
1661                dma_dom->need_flush = false;
1662        } else if (unlikely(iommu_has_npcache(iommu)))
1663                iommu_flush_pages(iommu, dma_dom->domain.id, address, size);
1664
1665out:
1666        return address;
1667
1668out_unmap:
1669
1670        for (--i; i >= 0; --i) {
1671                start -= PAGE_SIZE;
1672                dma_ops_domain_unmap(iommu, dma_dom, start);
1673        }
1674
1675        dma_ops_free_addresses(dma_dom, address, pages);
1676
1677        return bad_dma_address;
1678}
1679
1680/*
1681 * Does the reverse of the __map_single function. Must be called with
1682 * the domain lock held too
1683 */
1684static void __unmap_single(struct amd_iommu *iommu,
1685                           struct dma_ops_domain *dma_dom,
1686                           dma_addr_t dma_addr,
1687                           size_t size,
1688                           int dir)
1689{
1690        dma_addr_t i, start;
1691        unsigned int pages;
1692
1693        if ((dma_addr == bad_dma_address) ||
1694            (dma_addr + size > dma_dom->aperture_size))
1695                return;
1696
1697        pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
1698        dma_addr &= PAGE_MASK;
1699        start = dma_addr;
1700
1701        for (i = 0; i < pages; ++i) {
1702                dma_ops_domain_unmap(iommu, dma_dom, start);
1703                start += PAGE_SIZE;
1704        }
1705
1706        SUB_STATS_COUNTER(alloced_io_mem, size);
1707
1708        dma_ops_free_addresses(dma_dom, dma_addr, pages);
1709
1710        if (amd_iommu_unmap_flush || dma_dom->need_flush) {
1711                iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
1712                dma_dom->need_flush = false;
1713        }
1714}
1715
1716/*
1717 * The exported map_single function for dma_ops.
1718 */
1719static dma_addr_t map_page(struct device *dev, struct page *page,
1720                           unsigned long offset, size_t size,
1721                           enum dma_data_direction dir,
1722                           struct dma_attrs *attrs)
1723{
1724        unsigned long flags;
1725        struct amd_iommu *iommu;
1726        struct protection_domain *domain;
1727        u16 devid;
1728        dma_addr_t addr;
1729        u64 dma_mask;
1730        phys_addr_t paddr = page_to_phys(page) + offset;
1731
1732        INC_STATS_COUNTER(cnt_map_single);
1733
1734        if (!check_device(dev))
1735                return bad_dma_address;
1736
1737        dma_mask = *dev->dma_mask;
1738
1739        get_device_resources(dev, &iommu, &domain, &devid);
1740
1741        if (iommu == NULL || domain == NULL)
1742                /* device not handled by any AMD IOMMU */
1743                return (dma_addr_t)paddr;
1744
1745        if (!dma_ops_domain(domain))
1746                return bad_dma_address;
1747
1748        spin_lock_irqsave(&domain->lock, flags);
1749        addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
1750                            dma_mask);
1751        if (addr == bad_dma_address)
1752                goto out;
1753
1754        iommu_completion_wait(iommu);
1755
1756out:
1757        spin_unlock_irqrestore(&domain->lock, flags);
1758
1759        return addr;
1760}
1761
1762/*
1763 * The exported unmap_single function for dma_ops.
1764 */
1765static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
1766                       enum dma_data_direction dir, struct dma_attrs *attrs)
1767{
1768        unsigned long flags;
1769        struct amd_iommu *iommu;
1770        struct protection_domain *domain;
1771        u16 devid;
1772
1773        INC_STATS_COUNTER(cnt_unmap_single);
1774
1775        if (!check_device(dev) ||
1776            !get_device_resources(dev, &iommu, &domain, &devid))
1777                /* device not handled by any AMD IOMMU */
1778                return;
1779
1780        if (!dma_ops_domain(domain))
1781                return;
1782
1783        spin_lock_irqsave(&domain->lock, flags);
1784
1785        __unmap_single(iommu, domain->priv, dma_addr, size, dir);
1786
1787        iommu_completion_wait(iommu);
1788
1789        spin_unlock_irqrestore(&domain->lock, flags);
1790}
1791
1792/*
1793 * This is a special map_sg function which is used if we should map a
1794 * device which is not handled by an AMD IOMMU in the system.
1795 */
1796static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
1797                           int nelems, int dir)
1798{
1799        struct scatterlist *s;
1800        int i;
1801
1802        for_each_sg(sglist, s, nelems, i) {
1803                s->dma_address = (dma_addr_t)sg_phys(s);
1804                s->dma_length  = s->length;
1805        }
1806
1807        return nelems;
1808}
1809
1810/*
1811 * The exported map_sg function for dma_ops (handles scatter-gather
1812 * lists).
1813 */
1814static int map_sg(struct device *dev, struct scatterlist *sglist,
1815                  int nelems, enum dma_data_direction dir,
1816                  struct dma_attrs *attrs)
1817{
1818        unsigned long flags;
1819        struct amd_iommu *iommu;
1820        struct protection_domain *domain;
1821        u16 devid;
1822        int i;
1823        struct scatterlist *s;
1824        phys_addr_t paddr;
1825        int mapped_elems = 0;
1826        u64 dma_mask;
1827
1828        INC_STATS_COUNTER(cnt_map_sg);
1829
1830        if (!check_device(dev))
1831                return 0;
1832
1833        dma_mask = *dev->dma_mask;
1834
1835        get_device_resources(dev, &iommu, &domain, &devid);
1836
1837        if (!iommu || !domain)
1838                return map_sg_no_iommu(dev, sglist, nelems, dir);
1839
1840        if (!dma_ops_domain(domain))
1841                return 0;
1842
1843        spin_lock_irqsave(&domain->lock, flags);
1844
1845        for_each_sg(sglist, s, nelems, i) {
1846                paddr = sg_phys(s);
1847
1848                s->dma_address = __map_single(dev, iommu, domain->priv,
1849                                              paddr, s->length, dir, false,
1850                                              dma_mask);
1851
1852                if (s->dma_address) {
1853                        s->dma_length = s->length;
1854                        mapped_elems++;
1855                } else
1856                        goto unmap;
1857        }
1858
1859        iommu_completion_wait(iommu);
1860
1861out:
1862        spin_unlock_irqrestore(&domain->lock, flags);
1863
1864        return mapped_elems;
1865unmap:
1866        for_each_sg(sglist, s, mapped_elems, i) {
1867                if (s->dma_address)
1868                        __unmap_single(iommu, domain->priv, s->dma_address,
1869                                       s->dma_length, dir);
1870                s->dma_address = s->dma_length = 0;
1871        }
1872
1873        mapped_elems = 0;
1874
1875        goto out;
1876}
1877
1878/*
1879 * The exported map_sg function for dma_ops (handles scatter-gather
1880 * lists).
1881 */
1882static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1883                     int nelems, enum dma_data_direction dir,
1884                     struct dma_attrs *attrs)
1885{
1886        unsigned long flags;
1887        struct amd_iommu *iommu;
1888        struct protection_domain *domain;
1889        struct scatterlist *s;
1890        u16 devid;
1891        int i;
1892
1893        INC_STATS_COUNTER(cnt_unmap_sg);
1894
1895        if (!check_device(dev) ||
1896            !get_device_resources(dev, &iommu, &domain, &devid))
1897                return;
1898
1899        if (!dma_ops_domain(domain))
1900                return;
1901
1902        spin_lock_irqsave(&domain->lock, flags);
1903
1904        for_each_sg(sglist, s, nelems, i) {
1905                __unmap_single(iommu, domain->priv, s->dma_address,
1906                               s->dma_length, dir);
1907                s->dma_address = s->dma_length = 0;
1908        }
1909
1910        iommu_completion_wait(iommu);
1911
1912        spin_unlock_irqrestore(&domain->lock, flags);
1913}
1914
1915/*
1916 * The exported alloc_coherent function for dma_ops.
1917 */
1918static void *alloc_coherent(struct device *dev, size_t size,
1919                            dma_addr_t *dma_addr, gfp_t flag)
1920{
1921        unsigned long flags;
1922        void *virt_addr;
1923        struct amd_iommu *iommu;
1924        struct protection_domain *domain;
1925        u16 devid;
1926        phys_addr_t paddr;
1927        u64 dma_mask = dev->coherent_dma_mask;
1928
1929        INC_STATS_COUNTER(cnt_alloc_coherent);
1930
1931        if (!check_device(dev))
1932                return NULL;
1933
1934        if (!get_device_resources(dev, &iommu, &domain, &devid))
1935                flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
1936
1937        flag |= __GFP_ZERO;
1938        virt_addr = (void *)__get_free_pages(flag, get_order(size));
1939        if (!virt_addr)
1940                return NULL;
1941
1942        paddr = virt_to_phys(virt_addr);
1943
1944        if (!iommu || !domain) {
1945                *dma_addr = (dma_addr_t)paddr;
1946                return virt_addr;
1947        }
1948
1949        if (!dma_ops_domain(domain))
1950                goto out_free;
1951
1952        if (!dma_mask)
1953                dma_mask = *dev->dma_mask;
1954
1955        spin_lock_irqsave(&domain->lock, flags);
1956
1957        *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
1958                                 size, DMA_BIDIRECTIONAL, true, dma_mask);
1959
1960        if (*dma_addr == bad_dma_address) {
1961                spin_unlock_irqrestore(&domain->lock, flags);
1962                goto out_free;
1963        }
1964
1965        iommu_completion_wait(iommu);
1966
1967        spin_unlock_irqrestore(&domain->lock, flags);
1968
1969        return virt_addr;
1970
1971out_free:
1972
1973        free_pages((unsigned long)virt_addr, get_order(size));
1974
1975        return NULL;
1976}
1977
1978/*
1979 * The exported free_coherent function for dma_ops.
1980 */
1981static void free_coherent(struct device *dev, size_t size,
1982                          void *virt_addr, dma_addr_t dma_addr)
1983{
1984        unsigned long flags;
1985        struct amd_iommu *iommu;
1986        struct protection_domain *domain;
1987        u16 devid;
1988
1989        INC_STATS_COUNTER(cnt_free_coherent);
1990
1991        if (!check_device(dev))
1992                return;
1993
1994        get_device_resources(dev, &iommu, &domain, &devid);
1995
1996        if (!iommu || !domain)
1997                goto free_mem;
1998
1999        if (!dma_ops_domain(domain))
2000                goto free_mem;
2001
2002        spin_lock_irqsave(&domain->lock, flags);
2003
2004        __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
2005
2006        iommu_completion_wait(iommu);
2007
2008        spin_unlock_irqrestore(&domain->lock, flags);
2009
2010free_mem:
2011        free_pages((unsigned long)virt_addr, get_order(size));
2012}
2013
2014/*
2015 * This function is called by the DMA layer to find out if we can handle a
2016 * particular device. It is part of the dma_ops.
2017 */
2018static int amd_iommu_dma_supported(struct device *dev, u64 mask)
2019{
2020        u16 bdf;
2021        struct pci_dev *pcidev;
2022
2023        /* No device or no PCI device */
2024        if (!dev || dev->bus != &pci_bus_type)
2025                return 0;
2026
2027        pcidev = to_pci_dev(dev);
2028
2029        bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
2030
2031        /* Out of our scope? */
2032        if (bdf > amd_iommu_last_bdf)
2033                return 0;
2034
2035        return 1;
2036}
2037
2038/*
2039 * The function for pre-allocating protection domains.
2040 *
2041 * If the driver core informs the DMA layer if a driver grabs a device
2042 * we don't need to preallocate the protection domains anymore.
2043 * For now we have to.
2044 */
2045static void prealloc_protection_domains(void)
2046{
2047        struct pci_dev *dev = NULL;
2048        struct dma_ops_domain *dma_dom;
2049        struct amd_iommu *iommu;
2050        u16 devid;
2051
2052        while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
2053                devid = calc_devid(dev->bus->number, dev->devfn);
2054                if (devid > amd_iommu_last_bdf)
2055                        continue;
2056                devid = amd_iommu_alias_table[devid];
2057                if (domain_for_device(devid))
2058                        continue;
2059                iommu = amd_iommu_rlookup_table[devid];
2060                if (!iommu)
2061                        continue;
2062                dma_dom = dma_ops_domain_alloc(iommu);
2063                if (!dma_dom)
2064                        continue;
2065                init_unity_mappings_for_device(dma_dom, devid);
2066                dma_dom->target_dev = devid;
2067
2068                list_add_tail(&dma_dom->list, &iommu_pd_list);
2069        }
2070}
2071
2072static struct dma_map_ops amd_iommu_dma_ops = {
2073        .alloc_coherent = alloc_coherent,
2074        .free_coherent = free_coherent,
2075        .map_page = map_page,
2076        .unmap_page = unmap_page,
2077        .map_sg = map_sg,
2078        .unmap_sg = unmap_sg,
2079        .dma_supported = amd_iommu_dma_supported,
2080};
2081
2082/*
2083 * The function which clues the AMD IOMMU driver into dma_ops.
2084 */
2085int __init amd_iommu_init_dma_ops(void)
2086{
2087        struct amd_iommu *iommu;
2088        int ret;
2089
2090        /*
2091         * first allocate a default protection domain for every IOMMU we
2092         * found in the system. Devices not assigned to any other
2093         * protection domain will be assigned to the default one.
2094         */
2095        for_each_iommu(iommu) {
2096                iommu->default_dom = dma_ops_domain_alloc(iommu);
2097                if (iommu->default_dom == NULL)
2098                        return -ENOMEM;
2099                iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
2100                ret = iommu_init_unity_mappings(iommu);
2101                if (ret)
2102                        goto free_domains;
2103        }
2104
2105        /*
2106         * If device isolation is enabled, pre-allocate the protection
2107         * domains for each device.
2108         */
2109        if (amd_iommu_isolate)
2110                prealloc_protection_domains();
2111
2112        iommu_detected = 1;
2113        force_iommu = 1;
2114        bad_dma_address = 0;
2115#ifdef CONFIG_GART_IOMMU
2116        gart_iommu_aperture_disabled = 1;
2117        gart_iommu_aperture = 0;
2118#endif
2119
2120        /* Make the driver finally visible to the drivers */
2121        dma_ops = &amd_iommu_dma_ops;
2122
2123        register_iommu(&amd_iommu_ops);
2124
2125        bus_register_notifier(&pci_bus_type, &device_nb);
2126
2127        amd_iommu_stats_init();
2128
2129        return 0;
2130
2131free_domains:
2132
2133        for_each_iommu(iommu) {
2134                if (iommu->default_dom)
2135                        dma_ops_domain_free(iommu->default_dom);
2136        }
2137
2138        return ret;
2139}
2140
2141/*****************************************************************************
2142 *
2143 * The following functions belong to the exported interface of AMD IOMMU
2144 *
2145 * This interface allows access to lower level functions of the IOMMU
2146 * like protection domain handling and assignement of devices to domains
2147 * which is not possible with the dma_ops interface.
2148 *
2149 *****************************************************************************/
2150
2151static void cleanup_domain(struct protection_domain *domain)
2152{
2153        unsigned long flags;
2154        u16 devid;
2155
2156        write_lock_irqsave(&amd_iommu_devtable_lock, flags);
2157
2158        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
2159                if (amd_iommu_pd_table[devid] == domain)
2160                        __detach_device(domain, devid);
2161
2162        write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
2163}
2164
2165static void protection_domain_free(struct protection_domain *domain)
2166{
2167        if (!domain)
2168                return;
2169
2170        if (domain->id)
2171                domain_id_free(domain->id);
2172
2173        kfree(domain);
2174}
2175
2176static struct protection_domain *protection_domain_alloc(void)
2177{
2178        struct protection_domain *domain;
2179
2180        domain = kzalloc(sizeof(*domain), GFP_KERNEL);
2181        if (!domain)
2182                return NULL;
2183
2184        spin_lock_init(&domain->lock);
2185        domain->id = domain_id_alloc();
2186        if (!domain->id)
2187                goto out_err;
2188
2189        return domain;
2190
2191out_err:
2192        kfree(domain);
2193
2194        return NULL;
2195}
2196
2197static int amd_iommu_domain_init(struct iommu_domain *dom)
2198{
2199        struct protection_domain *domain;
2200
2201        domain = protection_domain_alloc();
2202        if (!domain)
2203                goto out_free;
2204
2205        domain->mode    = PAGE_MODE_3_LEVEL;
2206        domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
2207        if (!domain->pt_root)
2208                goto out_free;
2209
2210        dom->priv = domain;
2211
2212        return 0;
2213
2214out_free:
2215        protection_domain_free(domain);
2216
2217        return -ENOMEM;
2218}
2219
2220static void amd_iommu_domain_destroy(struct iommu_domain *dom)
2221{
2222        struct protection_domain *domain = dom->priv;
2223
2224        if (!domain)
2225                return;
2226
2227        if (domain->dev_cnt > 0)
2228                cleanup_domain(domain);
2229
2230        BUG_ON(domain->dev_cnt != 0);
2231
2232        free_pagetable(domain);
2233
2234        domain_id_free(domain->id);
2235
2236        kfree(domain);
2237
2238        dom->priv = NULL;
2239}
2240
2241static void amd_iommu_detach_device(struct iommu_domain *dom,
2242                                    struct device *dev)
2243{
2244        struct protection_domain *domain = dom->priv;
2245        struct amd_iommu *iommu;
2246        struct pci_dev *pdev;
2247        u16 devid;
2248
2249        if (dev->bus != &pci_bus_type)
2250                return;
2251
2252        pdev = to_pci_dev(dev);
2253
2254        devid = calc_devid(pdev->bus->number, pdev->devfn);
2255
2256        if (devid > 0)
2257                detach_device(domain, devid);
2258
2259        iommu = amd_iommu_rlookup_table[devid];
2260        if (!iommu)
2261                return;
2262
2263        iommu_queue_inv_dev_entry(iommu, devid);
2264        iommu_completion_wait(iommu);
2265}
2266
2267static int amd_iommu_attach_device(struct iommu_domain *dom,
2268                                   struct device *dev)
2269{
2270        struct protection_domain *domain = dom->priv;
2271        struct protection_domain *old_domain;
2272        struct amd_iommu *iommu;
2273        struct pci_dev *pdev;
2274        u16 devid;
2275
2276        if (dev->bus != &pci_bus_type)
2277                return -EINVAL;
2278
2279        pdev = to_pci_dev(dev);
2280
2281        devid = calc_devid(pdev->bus->number, pdev->devfn);
2282
2283        if (devid >= amd_iommu_last_bdf ||
2284                        devid != amd_iommu_alias_table[devid])
2285                return -EINVAL;
2286
2287        iommu = amd_iommu_rlookup_table[devid];
2288        if (!iommu)
2289                return -EINVAL;
2290
2291        old_domain = domain_for_device(devid);
2292        if (old_domain)
2293                detach_device(old_domain, devid);
2294
2295        attach_device(iommu, domain, devid);
2296
2297        iommu_completion_wait(iommu);
2298
2299        return 0;
2300}
2301
2302static int amd_iommu_map_range(struct iommu_domain *dom,
2303                               unsigned long iova, phys_addr_t paddr,
2304                               size_t size, int iommu_prot)
2305{
2306        struct protection_domain *domain = dom->priv;
2307        unsigned long i,  npages = iommu_num_pages(paddr, size, PAGE_SIZE);
2308        int prot = 0;
2309        int ret;
2310
2311        if (iommu_prot & IOMMU_READ)
2312                prot |= IOMMU_PROT_IR;
2313        if (iommu_prot & IOMMU_WRITE)
2314                prot |= IOMMU_PROT_IW;
2315
2316        iova  &= PAGE_MASK;
2317        paddr &= PAGE_MASK;
2318
2319        for (i = 0; i < npages; ++i) {
2320                ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k);
2321                if (ret)
2322                        return ret;
2323
2324                iova  += PAGE_SIZE;
2325                paddr += PAGE_SIZE;
2326        }
2327
2328        return 0;
2329}
2330
2331static void amd_iommu_unmap_range(struct iommu_domain *dom,
2332                                  unsigned long iova, size_t size)
2333{
2334
2335        struct protection_domain *domain = dom->priv;
2336        unsigned long i,  npages = iommu_num_pages(iova, size, PAGE_SIZE);
2337
2338        iova  &= PAGE_MASK;
2339
2340        for (i = 0; i < npages; ++i) {
2341                iommu_unmap_page(domain, iova, PM_MAP_4k);
2342                iova  += PAGE_SIZE;
2343        }
2344
2345        iommu_flush_domain(domain->id);
2346}
2347
2348static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2349                                          unsigned long iova)
2350{
2351        struct protection_domain *domain = dom->priv;
2352        unsigned long offset = iova & ~PAGE_MASK;
2353        phys_addr_t paddr;
2354        u64 *pte;
2355
2356        pte = fetch_pte(domain, iova, PM_MAP_4k);
2357
2358        if (!pte || !IOMMU_PTE_PRESENT(*pte))
2359                return 0;
2360
2361        paddr  = *pte & IOMMU_PAGE_MASK;
2362        paddr |= offset;
2363
2364        return paddr;
2365}
2366
2367static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
2368                                    unsigned long cap)
2369{
2370        return 0;
2371}
2372
2373static struct iommu_ops amd_iommu_ops = {
2374        .domain_init = amd_iommu_domain_init,
2375        .domain_destroy = amd_iommu_domain_destroy,
2376        .attach_dev = amd_iommu_attach_device,
2377        .detach_dev = amd_iommu_detach_device,
2378        .map = amd_iommu_map_range,
2379        .unmap = amd_iommu_unmap_range,
2380        .iova_to_phys = amd_iommu_iova_to_phys,
2381        .domain_has_cap = amd_iommu_domain_has_cap,
2382};
2383
2384/*****************************************************************************
2385 *
2386 * The next functions do a basic initialization of IOMMU for pass through
2387 * mode
2388 *
2389 * In passthrough mode the IOMMU is initialized and enabled but not used for
2390 * DMA-API translation.
2391 *
2392 *****************************************************************************/
2393
2394int __init amd_iommu_init_passthrough(void)
2395{
2396        struct pci_dev *dev = NULL;
2397        u16 devid, devid2;
2398
2399        /* allocate passthroug domain */
2400        pt_domain = protection_domain_alloc();
2401        if (!pt_domain)
2402                return -ENOMEM;
2403
2404        pt_domain->mode |= PAGE_MODE_NONE;
2405
2406        while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
2407                struct amd_iommu *iommu;
2408
2409                devid = calc_devid(dev->bus->number, dev->devfn);
2410                if (devid > amd_iommu_last_bdf)
2411                        continue;
2412
2413                devid2 = amd_iommu_alias_table[devid];
2414
2415                iommu = amd_iommu_rlookup_table[devid2];
2416                if (!iommu)
2417                        continue;
2418
2419                __attach_device(iommu, pt_domain, devid);
2420                __attach_device(iommu, pt_domain, devid2);
2421        }
2422
2423        pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
2424
2425        return 0;
2426}
2427