linux/drivers/iommu/dmar.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006, Intel Corporation.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * You should have received a copy of the GNU General Public License along with
  14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  15 * Place - Suite 330, Boston, MA 02111-1307 USA.
  16 *
  17 * Copyright (C) 2006-2008 Intel Corporation
  18 * Author: Ashok Raj <ashok.raj@intel.com>
  19 * Author: Shaohua Li <shaohua.li@intel.com>
  20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  21 *
  22 * This file implements early detection/parsing of Remapping Devices
  23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
  24 * tables.
  25 *
  26 * These routines are used by both DMA-remapping and Interrupt-remapping
  27 */
  28
  29#include <linux/pci.h>
  30#include <linux/dmar.h>
  31#include <linux/iova.h>
  32#include <linux/intel-iommu.h>
  33#include <linux/timer.h>
  34#include <linux/irq.h>
  35#include <linux/interrupt.h>
  36#include <linux/tboot.h>
  37#include <linux/dmi.h>
  38#include <linux/slab.h>
  39#include <asm/iommu_table.h>
  40
  41#define PREFIX "DMAR: "
  42
  43/* No locks are needed as DMA remapping hardware unit
  44 * list is constructed at boot time and hotplug of
  45 * these units are not supported by the architecture.
  46 */
  47LIST_HEAD(dmar_drhd_units);
  48
  49struct acpi_table_header * __initdata dmar_tbl;
  50static acpi_size dmar_tbl_size;
  51
  52static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
  53{
  54        /*
  55         * add INCLUDE_ALL at the tail, so scan the list will find it at
  56         * the very end.
  57         */
  58        if (drhd->include_all)
  59                list_add_tail(&drhd->list, &dmar_drhd_units);
  60        else
  61                list_add(&drhd->list, &dmar_drhd_units);
  62}
  63
  64static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
  65                                           struct pci_dev **dev, u16 segment)
  66{
  67        struct pci_bus *bus;
  68        struct pci_dev *pdev = NULL;
  69        struct acpi_dmar_pci_path *path;
  70        int count;
  71
  72        bus = pci_find_bus(segment, scope->bus);
  73        path = (struct acpi_dmar_pci_path *)(scope + 1);
  74        count = (scope->length - sizeof(struct acpi_dmar_device_scope))
  75                / sizeof(struct acpi_dmar_pci_path);
  76
  77        while (count) {
  78                if (pdev)
  79                        pci_dev_put(pdev);
  80                /*
  81                 * Some BIOSes list non-exist devices in DMAR table, just
  82                 * ignore it
  83                 */
  84                if (!bus) {
  85                        printk(KERN_WARNING
  86                        PREFIX "Device scope bus [%d] not found\n",
  87                        scope->bus);
  88                        break;
  89                }
  90                pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
  91                if (!pdev) {
  92                        printk(KERN_WARNING PREFIX
  93                        "Device scope device [%04x:%02x:%02x.%02x] not found\n",
  94                                segment, bus->number, path->dev, path->fn);
  95                        break;
  96                }
  97                path ++;
  98                count --;
  99                bus = pdev->subordinate;
 100        }
 101        if (!pdev) {
 102                printk(KERN_WARNING PREFIX
 103                "Device scope device [%04x:%02x:%02x.%02x] not found\n",
 104                segment, scope->bus, path->dev, path->fn);
 105                *dev = NULL;
 106                return 0;
 107        }
 108        if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
 109                        pdev->subordinate) || (scope->entry_type == \
 110                        ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
 111                pci_dev_put(pdev);
 112                printk(KERN_WARNING PREFIX
 113                        "Device scope type does not match for %s\n",
 114                         pci_name(pdev));
 115                return -EINVAL;
 116        }
 117        *dev = pdev;
 118        return 0;
 119}
 120
 121int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
 122                                struct pci_dev ***devices, u16 segment)
 123{
 124        struct acpi_dmar_device_scope *scope;
 125        void * tmp = start;
 126        int index;
 127        int ret;
 128
 129        *cnt = 0;
 130        while (start < end) {
 131                scope = start;
 132                if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
 133                    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
 134                        (*cnt)++;
 135                else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
 136                        printk(KERN_WARNING PREFIX
 137                               "Unsupported device scope\n");
 138                }
 139                start += scope->length;
 140        }
 141        if (*cnt == 0)
 142                return 0;
 143
 144        *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
 145        if (!*devices)
 146                return -ENOMEM;
 147
 148        start = tmp;
 149        index = 0;
 150        while (start < end) {
 151                scope = start;
 152                if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
 153                    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
 154                        ret = dmar_parse_one_dev_scope(scope,
 155                                &(*devices)[index], segment);
 156                        if (ret) {
 157                                kfree(*devices);
 158                                return ret;
 159                        }
 160                        index ++;
 161                }
 162                start += scope->length;
 163        }
 164
 165        return 0;
 166}
 167
 168/**
 169 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
 170 * structure which uniquely represent one DMA remapping hardware unit
 171 * present in the platform
 172 */
 173static int __init
 174dmar_parse_one_drhd(struct acpi_dmar_header *header)
 175{
 176        struct acpi_dmar_hardware_unit *drhd;
 177        struct dmar_drhd_unit *dmaru;
 178        int ret = 0;
 179
 180        drhd = (struct acpi_dmar_hardware_unit *)header;
 181        dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
 182        if (!dmaru)
 183                return -ENOMEM;
 184
 185        dmaru->hdr = header;
 186        dmaru->reg_base_addr = drhd->address;
 187        dmaru->segment = drhd->segment;
 188        dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
 189
 190        ret = alloc_iommu(dmaru);
 191        if (ret) {
 192                kfree(dmaru);
 193                return ret;
 194        }
 195        dmar_register_drhd_unit(dmaru);
 196        return 0;
 197}
 198
 199static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
 200{
 201        struct acpi_dmar_hardware_unit *drhd;
 202        int ret = 0;
 203
 204        drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
 205
 206        if (dmaru->include_all)
 207                return 0;
 208
 209        ret = dmar_parse_dev_scope((void *)(drhd + 1),
 210                                ((void *)drhd) + drhd->header.length,
 211                                &dmaru->devices_cnt, &dmaru->devices,
 212                                drhd->segment);
 213        if (ret) {
 214                list_del(&dmaru->list);
 215                kfree(dmaru);
 216        }
 217        return ret;
 218}
 219
 220#ifdef CONFIG_ACPI_NUMA
 221static int __init
 222dmar_parse_one_rhsa(struct acpi_dmar_header *header)
 223{
 224        struct acpi_dmar_rhsa *rhsa;
 225        struct dmar_drhd_unit *drhd;
 226
 227        rhsa = (struct acpi_dmar_rhsa *)header;
 228        for_each_drhd_unit(drhd) {
 229                if (drhd->reg_base_addr == rhsa->base_address) {
 230                        int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
 231
 232                        if (!node_online(node))
 233                                node = -1;
 234                        drhd->iommu->node = node;
 235                        return 0;
 236                }
 237        }
 238        WARN_TAINT(
 239                1, TAINT_FIRMWARE_WORKAROUND,
 240                "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
 241                "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
 242                drhd->reg_base_addr,
 243                dmi_get_system_info(DMI_BIOS_VENDOR),
 244                dmi_get_system_info(DMI_BIOS_VERSION),
 245                dmi_get_system_info(DMI_PRODUCT_VERSION));
 246
 247        return 0;
 248}
 249#endif
 250
 251static void __init
 252dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 253{
 254        struct acpi_dmar_hardware_unit *drhd;
 255        struct acpi_dmar_reserved_memory *rmrr;
 256        struct acpi_dmar_atsr *atsr;
 257        struct acpi_dmar_rhsa *rhsa;
 258
 259        switch (header->type) {
 260        case ACPI_DMAR_TYPE_HARDWARE_UNIT:
 261                drhd = container_of(header, struct acpi_dmar_hardware_unit,
 262                                    header);
 263                printk (KERN_INFO PREFIX
 264                        "DRHD base: %#016Lx flags: %#x\n",
 265                        (unsigned long long)drhd->address, drhd->flags);
 266                break;
 267        case ACPI_DMAR_TYPE_RESERVED_MEMORY:
 268                rmrr = container_of(header, struct acpi_dmar_reserved_memory,
 269                                    header);
 270                printk (KERN_INFO PREFIX
 271                        "RMRR base: %#016Lx end: %#016Lx\n",
 272                        (unsigned long long)rmrr->base_address,
 273                        (unsigned long long)rmrr->end_address);
 274                break;
 275        case ACPI_DMAR_TYPE_ATSR:
 276                atsr = container_of(header, struct acpi_dmar_atsr, header);
 277                printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
 278                break;
 279        case ACPI_DMAR_HARDWARE_AFFINITY:
 280                rhsa = container_of(header, struct acpi_dmar_rhsa, header);
 281                printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
 282                       (unsigned long long)rhsa->base_address,
 283                       rhsa->proximity_domain);
 284                break;
 285        }
 286}
 287
 288/**
 289 * dmar_table_detect - checks to see if the platform supports DMAR devices
 290 */
 291static int __init dmar_table_detect(void)
 292{
 293        acpi_status status = AE_OK;
 294
 295        /* if we could find DMAR table, then there are DMAR devices */
 296        status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
 297                                (struct acpi_table_header **)&dmar_tbl,
 298                                &dmar_tbl_size);
 299
 300        if (ACPI_SUCCESS(status) && !dmar_tbl) {
 301                printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
 302                status = AE_NOT_FOUND;
 303        }
 304
 305        return (ACPI_SUCCESS(status) ? 1 : 0);
 306}
 307
 308/**
 309 * parse_dmar_table - parses the DMA reporting table
 310 */
 311static int __init
 312parse_dmar_table(void)
 313{
 314        struct acpi_table_dmar *dmar;
 315        struct acpi_dmar_header *entry_header;
 316        int ret = 0;
 317
 318        /*
 319         * Do it again, earlier dmar_tbl mapping could be mapped with
 320         * fixed map.
 321         */
 322        dmar_table_detect();
 323
 324        /*
 325         * ACPI tables may not be DMA protected by tboot, so use DMAR copy
 326         * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
 327         */
 328        dmar_tbl = tboot_get_dmar_table(dmar_tbl);
 329
 330        dmar = (struct acpi_table_dmar *)dmar_tbl;
 331        if (!dmar)
 332                return -ENODEV;
 333
 334        if (dmar->width < PAGE_SHIFT - 1) {
 335                printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
 336                return -EINVAL;
 337        }
 338
 339        printk (KERN_INFO PREFIX "Host address width %d\n",
 340                dmar->width + 1);
 341
 342        entry_header = (struct acpi_dmar_header *)(dmar + 1);
 343        while (((unsigned long)entry_header) <
 344                        (((unsigned long)dmar) + dmar_tbl->length)) {
 345                /* Avoid looping forever on bad ACPI tables */
 346                if (entry_header->length == 0) {
 347                        printk(KERN_WARNING PREFIX
 348                                "Invalid 0-length structure\n");
 349                        ret = -EINVAL;
 350                        break;
 351                }
 352
 353                dmar_table_print_dmar_entry(entry_header);
 354
 355                switch (entry_header->type) {
 356                case ACPI_DMAR_TYPE_HARDWARE_UNIT:
 357                        ret = dmar_parse_one_drhd(entry_header);
 358                        break;
 359                case ACPI_DMAR_TYPE_RESERVED_MEMORY:
 360                        ret = dmar_parse_one_rmrr(entry_header);
 361                        break;
 362                case ACPI_DMAR_TYPE_ATSR:
 363                        ret = dmar_parse_one_atsr(entry_header);
 364                        break;
 365                case ACPI_DMAR_HARDWARE_AFFINITY:
 366#ifdef CONFIG_ACPI_NUMA
 367                        ret = dmar_parse_one_rhsa(entry_header);
 368#endif
 369                        break;
 370                default:
 371                        printk(KERN_WARNING PREFIX
 372                                "Unknown DMAR structure type %d\n",
 373                                entry_header->type);
 374                        ret = 0; /* for forward compatibility */
 375                        break;
 376                }
 377                if (ret)
 378                        break;
 379
 380                entry_header = ((void *)entry_header + entry_header->length);
 381        }
 382        return ret;
 383}
 384
 385static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
 386                          struct pci_dev *dev)
 387{
 388        int index;
 389
 390        while (dev) {
 391                for (index = 0; index < cnt; index++)
 392                        if (dev == devices[index])
 393                                return 1;
 394
 395                /* Check our parent */
 396                dev = dev->bus->self;
 397        }
 398
 399        return 0;
 400}
 401
 402struct dmar_drhd_unit *
 403dmar_find_matched_drhd_unit(struct pci_dev *dev)
 404{
 405        struct dmar_drhd_unit *dmaru = NULL;
 406        struct acpi_dmar_hardware_unit *drhd;
 407
 408        dev = pci_physfn(dev);
 409
 410        list_for_each_entry(dmaru, &dmar_drhd_units, list) {
 411                drhd = container_of(dmaru->hdr,
 412                                    struct acpi_dmar_hardware_unit,
 413                                    header);
 414
 415                if (dmaru->include_all &&
 416                    drhd->segment == pci_domain_nr(dev->bus))
 417                        return dmaru;
 418
 419                if (dmar_pci_device_match(dmaru->devices,
 420                                          dmaru->devices_cnt, dev))
 421                        return dmaru;
 422        }
 423
 424        return NULL;
 425}
 426
 427int __init dmar_dev_scope_init(void)
 428{
 429        static int dmar_dev_scope_initialized;
 430        struct dmar_drhd_unit *drhd, *drhd_n;
 431        int ret = -ENODEV;
 432
 433        if (dmar_dev_scope_initialized)
 434                return dmar_dev_scope_initialized;
 435
 436        if (list_empty(&dmar_drhd_units))
 437                goto fail;
 438
 439        list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
 440                ret = dmar_parse_dev(drhd);
 441                if (ret)
 442                        goto fail;
 443        }
 444
 445        ret = dmar_parse_rmrr_atsr_dev();
 446        if (ret)
 447                goto fail;
 448
 449        dmar_dev_scope_initialized = 1;
 450        return 0;
 451
 452fail:
 453        dmar_dev_scope_initialized = ret;
 454        return ret;
 455}
 456
 457
 458int __init dmar_table_init(void)
 459{
 460        static int dmar_table_initialized;
 461        int ret;
 462
 463        if (dmar_table_initialized)
 464                return 0;
 465
 466        dmar_table_initialized = 1;
 467
 468        ret = parse_dmar_table();
 469        if (ret) {
 470                if (ret != -ENODEV)
 471                        printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
 472                return ret;
 473        }
 474
 475        if (list_empty(&dmar_drhd_units)) {
 476                printk(KERN_INFO PREFIX "No DMAR devices found\n");
 477                return -ENODEV;
 478        }
 479
 480        return 0;
 481}
 482
 483static void warn_invalid_dmar(u64 addr, const char *message)
 484{
 485        WARN_TAINT_ONCE(
 486                1, TAINT_FIRMWARE_WORKAROUND,
 487                "Your BIOS is broken; DMAR reported at address %llx%s!\n"
 488                "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
 489                addr, message,
 490                dmi_get_system_info(DMI_BIOS_VENDOR),
 491                dmi_get_system_info(DMI_BIOS_VERSION),
 492                dmi_get_system_info(DMI_PRODUCT_VERSION));
 493}
 494
 495int __init check_zero_address(void)
 496{
 497        struct acpi_table_dmar *dmar;
 498        struct acpi_dmar_header *entry_header;
 499        struct acpi_dmar_hardware_unit *drhd;
 500
 501        dmar = (struct acpi_table_dmar *)dmar_tbl;
 502        entry_header = (struct acpi_dmar_header *)(dmar + 1);
 503
 504        while (((unsigned long)entry_header) <
 505                        (((unsigned long)dmar) + dmar_tbl->length)) {
 506                /* Avoid looping forever on bad ACPI tables */
 507                if (entry_header->length == 0) {
 508                        printk(KERN_WARNING PREFIX
 509                                "Invalid 0-length structure\n");
 510                        return 0;
 511                }
 512
 513                if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
 514                        void __iomem *addr;
 515                        u64 cap, ecap;
 516
 517                        drhd = (void *)entry_header;
 518                        if (!drhd->address) {
 519                                warn_invalid_dmar(0, "");
 520                                goto failed;
 521                        }
 522
 523                        addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
 524                        if (!addr ) {
 525                                printk("IOMMU: can't validate: %llx\n", drhd->address);
 526                                goto failed;
 527                        }
 528                        cap = dmar_readq(addr + DMAR_CAP_REG);
 529                        ecap = dmar_readq(addr + DMAR_ECAP_REG);
 530                        early_iounmap(addr, VTD_PAGE_SIZE);
 531                        if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
 532                                warn_invalid_dmar(drhd->address,
 533                                                  " returns all ones");
 534                                goto failed;
 535                        }
 536                }
 537
 538                entry_header = ((void *)entry_header + entry_header->length);
 539        }
 540        return 1;
 541
 542failed:
 543        return 0;
 544}
 545
 546int __init detect_intel_iommu(void)
 547{
 548        int ret;
 549
 550        ret = dmar_table_detect();
 551        if (ret)
 552                ret = check_zero_address();
 553        {
 554                struct acpi_table_dmar *dmar;
 555
 556                dmar = (struct acpi_table_dmar *) dmar_tbl;
 557
 558                if (ret && intr_remapping_enabled && cpu_has_x2apic &&
 559                    dmar->flags & 0x1)
 560                        printk(KERN_INFO
 561                               "Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
 562
 563                if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
 564                        iommu_detected = 1;
 565                        /* Make sure ACS will be enabled */
 566                        pci_request_acs();
 567                }
 568
 569#ifdef CONFIG_X86
 570                if (ret)
 571                        x86_init.iommu.iommu_init = intel_iommu_init;
 572#endif
 573        }
 574        early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
 575        dmar_tbl = NULL;
 576
 577        return ret ? 1 : -ENODEV;
 578}
 579
 580
 581int alloc_iommu(struct dmar_drhd_unit *drhd)
 582{
 583        struct intel_iommu *iommu;
 584        int map_size;
 585        u32 ver;
 586        static int iommu_allocated = 0;
 587        int agaw = 0;
 588        int msagaw = 0;
 589
 590        if (!drhd->reg_base_addr) {
 591                warn_invalid_dmar(0, "");
 592                return -EINVAL;
 593        }
 594
 595        iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
 596        if (!iommu)
 597                return -ENOMEM;
 598
 599        iommu->seq_id = iommu_allocated++;
 600        sprintf (iommu->name, "dmar%d", iommu->seq_id);
 601
 602        iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
 603        if (!iommu->reg) {
 604                printk(KERN_ERR "IOMMU: can't map the region\n");
 605                goto error;
 606        }
 607        iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
 608        iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
 609
 610        if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
 611                warn_invalid_dmar(drhd->reg_base_addr, " returns all ones");
 612                goto err_unmap;
 613        }
 614
 615        agaw = iommu_calculate_agaw(iommu);
 616        if (agaw < 0) {
 617                printk(KERN_ERR
 618                       "Cannot get a valid agaw for iommu (seq_id = %d)\n",
 619                       iommu->seq_id);
 620                goto err_unmap;
 621        }
 622        msagaw = iommu_calculate_max_sagaw(iommu);
 623        if (msagaw < 0) {
 624                printk(KERN_ERR
 625                        "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
 626                        iommu->seq_id);
 627                goto err_unmap;
 628        }
 629        iommu->agaw = agaw;
 630        iommu->msagaw = msagaw;
 631
 632        iommu->node = -1;
 633
 634        /* the registers might be more than one page */
 635        map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
 636                cap_max_fault_reg_offset(iommu->cap));
 637        map_size = VTD_PAGE_ALIGN(map_size);
 638        if (map_size > VTD_PAGE_SIZE) {
 639                iounmap(iommu->reg);
 640                iommu->reg = ioremap(drhd->reg_base_addr, map_size);
 641                if (!iommu->reg) {
 642                        printk(KERN_ERR "IOMMU: can't map the region\n");
 643                        goto error;
 644                }
 645        }
 646
 647        ver = readl(iommu->reg + DMAR_VER_REG);
 648        pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
 649                iommu->seq_id,
 650                (unsigned long long)drhd->reg_base_addr,
 651                DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
 652                (unsigned long long)iommu->cap,
 653                (unsigned long long)iommu->ecap);
 654
 655        raw_spin_lock_init(&iommu->register_lock);
 656
 657        drhd->iommu = iommu;
 658        return 0;
 659
 660 err_unmap:
 661        iounmap(iommu->reg);
 662 error:
 663        kfree(iommu);
 664        return -1;
 665}
 666
 667void free_iommu(struct intel_iommu *iommu)
 668{
 669        if (!iommu)
 670                return;
 671
 672        free_dmar_iommu(iommu);
 673
 674        if (iommu->reg)
 675                iounmap(iommu->reg);
 676        kfree(iommu);
 677}
 678
 679/*
 680 * Reclaim all the submitted descriptors which have completed its work.
 681 */
 682static inline void reclaim_free_desc(struct q_inval *qi)
 683{
 684        while (qi->desc_status[qi->free_tail] == QI_DONE ||
 685               qi->desc_status[qi->free_tail] == QI_ABORT) {
 686                qi->desc_status[qi->free_tail] = QI_FREE;
 687                qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
 688                qi->free_cnt++;
 689        }
 690}
 691
 692static int qi_check_fault(struct intel_iommu *iommu, int index)
 693{
 694        u32 fault;
 695        int head, tail;
 696        struct q_inval *qi = iommu->qi;
 697        int wait_index = (index + 1) % QI_LENGTH;
 698
 699        if (qi->desc_status[wait_index] == QI_ABORT)
 700                return -EAGAIN;
 701
 702        fault = readl(iommu->reg + DMAR_FSTS_REG);
 703
 704        /*
 705         * If IQE happens, the head points to the descriptor associated
 706         * with the error. No new descriptors are fetched until the IQE
 707         * is cleared.
 708         */
 709        if (fault & DMA_FSTS_IQE) {
 710                head = readl(iommu->reg + DMAR_IQH_REG);
 711                if ((head >> DMAR_IQ_SHIFT) == index) {
 712                        printk(KERN_ERR "VT-d detected invalid descriptor: "
 713                                "low=%llx, high=%llx\n",
 714                                (unsigned long long)qi->desc[index].low,
 715                                (unsigned long long)qi->desc[index].high);
 716                        memcpy(&qi->desc[index], &qi->desc[wait_index],
 717                                        sizeof(struct qi_desc));
 718                        __iommu_flush_cache(iommu, &qi->desc[index],
 719                                        sizeof(struct qi_desc));
 720                        writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
 721                        return -EINVAL;
 722                }
 723        }
 724
 725        /*
 726         * If ITE happens, all pending wait_desc commands are aborted.
 727         * No new descriptors are fetched until the ITE is cleared.
 728         */
 729        if (fault & DMA_FSTS_ITE) {
 730                head = readl(iommu->reg + DMAR_IQH_REG);
 731                head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
 732                head |= 1;
 733                tail = readl(iommu->reg + DMAR_IQT_REG);
 734                tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
 735
 736                writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
 737
 738                do {
 739                        if (qi->desc_status[head] == QI_IN_USE)
 740                                qi->desc_status[head] = QI_ABORT;
 741                        head = (head - 2 + QI_LENGTH) % QI_LENGTH;
 742                } while (head != tail);
 743
 744                if (qi->desc_status[wait_index] == QI_ABORT)
 745                        return -EAGAIN;
 746        }
 747
 748        if (fault & DMA_FSTS_ICE)
 749                writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
 750
 751        return 0;
 752}
 753
 754/*
 755 * Submit the queued invalidation descriptor to the remapping
 756 * hardware unit and wait for its completion.
 757 */
 758int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
 759{
 760        int rc;
 761        struct q_inval *qi = iommu->qi;
 762        struct qi_desc *hw, wait_desc;
 763        int wait_index, index;
 764        unsigned long flags;
 765
 766        if (!qi)
 767                return 0;
 768
 769        hw = qi->desc;
 770
 771restart:
 772        rc = 0;
 773
 774        raw_spin_lock_irqsave(&qi->q_lock, flags);
 775        while (qi->free_cnt < 3) {
 776                raw_spin_unlock_irqrestore(&qi->q_lock, flags);
 777                cpu_relax();
 778                raw_spin_lock_irqsave(&qi->q_lock, flags);
 779        }
 780
 781        index = qi->free_head;
 782        wait_index = (index + 1) % QI_LENGTH;
 783
 784        qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
 785
 786        hw[index] = *desc;
 787
 788        wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
 789                        QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
 790        wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
 791
 792        hw[wait_index] = wait_desc;
 793
 794        __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
 795        __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
 796
 797        qi->free_head = (qi->free_head + 2) % QI_LENGTH;
 798        qi->free_cnt -= 2;
 799
 800        /*
 801         * update the HW tail register indicating the presence of
 802         * new descriptors.
 803         */
 804        writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
 805
 806        while (qi->desc_status[wait_index] != QI_DONE) {
 807                /*
 808                 * We will leave the interrupts disabled, to prevent interrupt
 809                 * context to queue another cmd while a cmd is already submitted
 810                 * and waiting for completion on this cpu. This is to avoid
 811                 * a deadlock where the interrupt context can wait indefinitely
 812                 * for free slots in the queue.
 813                 */
 814                rc = qi_check_fault(iommu, index);
 815                if (rc)
 816                        break;
 817
 818                raw_spin_unlock(&qi->q_lock);
 819                cpu_relax();
 820                raw_spin_lock(&qi->q_lock);
 821        }
 822
 823        qi->desc_status[index] = QI_DONE;
 824
 825        reclaim_free_desc(qi);
 826        raw_spin_unlock_irqrestore(&qi->q_lock, flags);
 827
 828        if (rc == -EAGAIN)
 829                goto restart;
 830
 831        return rc;
 832}
 833
 834/*
 835 * Flush the global interrupt entry cache.
 836 */
 837void qi_global_iec(struct intel_iommu *iommu)
 838{
 839        struct qi_desc desc;
 840
 841        desc.low = QI_IEC_TYPE;
 842        desc.high = 0;
 843
 844        /* should never fail */
 845        qi_submit_sync(&desc, iommu);
 846}
 847
 848void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
 849                      u64 type)
 850{
 851        struct qi_desc desc;
 852
 853        desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
 854                        | QI_CC_GRAN(type) | QI_CC_TYPE;
 855        desc.high = 0;
 856
 857        qi_submit_sync(&desc, iommu);
 858}
 859
 860void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 861                    unsigned int size_order, u64 type)
 862{
 863        u8 dw = 0, dr = 0;
 864
 865        struct qi_desc desc;
 866        int ih = 0;
 867
 868        if (cap_write_drain(iommu->cap))
 869                dw = 1;
 870
 871        if (cap_read_drain(iommu->cap))
 872                dr = 1;
 873
 874        desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
 875                | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
 876        desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
 877                | QI_IOTLB_AM(size_order);
 878
 879        qi_submit_sync(&desc, iommu);
 880}
 881
 882void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
 883                        u64 addr, unsigned mask)
 884{
 885        struct qi_desc desc;
 886
 887        if (mask) {
 888                BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
 889                addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
 890                desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
 891        } else
 892                desc.high = QI_DEV_IOTLB_ADDR(addr);
 893
 894        if (qdep >= QI_DEV_IOTLB_MAX_INVS)
 895                qdep = 0;
 896
 897        desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
 898                   QI_DIOTLB_TYPE;
 899
 900        qi_submit_sync(&desc, iommu);
 901}
 902
 903/*
 904 * Disable Queued Invalidation interface.
 905 */
 906void dmar_disable_qi(struct intel_iommu *iommu)
 907{
 908        unsigned long flags;
 909        u32 sts;
 910        cycles_t start_time = get_cycles();
 911
 912        if (!ecap_qis(iommu->ecap))
 913                return;
 914
 915        raw_spin_lock_irqsave(&iommu->register_lock, flags);
 916
 917        sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
 918        if (!(sts & DMA_GSTS_QIES))
 919                goto end;
 920
 921        /*
 922         * Give a chance to HW to complete the pending invalidation requests.
 923         */
 924        while ((readl(iommu->reg + DMAR_IQT_REG) !=
 925                readl(iommu->reg + DMAR_IQH_REG)) &&
 926                (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
 927                cpu_relax();
 928
 929        iommu->gcmd &= ~DMA_GCMD_QIE;
 930        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
 931
 932        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
 933                      !(sts & DMA_GSTS_QIES), sts);
 934end:
 935        raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 936}
 937
 938/*
 939 * Enable queued invalidation.
 940 */
 941static void __dmar_enable_qi(struct intel_iommu *iommu)
 942{
 943        u32 sts;
 944        unsigned long flags;
 945        struct q_inval *qi = iommu->qi;
 946
 947        qi->free_head = qi->free_tail = 0;
 948        qi->free_cnt = QI_LENGTH;
 949
 950        raw_spin_lock_irqsave(&iommu->register_lock, flags);
 951
 952        /* write zero to the tail reg */
 953        writel(0, iommu->reg + DMAR_IQT_REG);
 954
 955        dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
 956
 957        iommu->gcmd |= DMA_GCMD_QIE;
 958        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
 959
 960        /* Make sure hardware complete it */
 961        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
 962
 963        raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 964}
 965
 966/*
 967 * Enable Queued Invalidation interface. This is a must to support
 968 * interrupt-remapping. Also used by DMA-remapping, which replaces
 969 * register based IOTLB invalidation.
 970 */
 971int dmar_enable_qi(struct intel_iommu *iommu)
 972{
 973        struct q_inval *qi;
 974        struct page *desc_page;
 975
 976        if (!ecap_qis(iommu->ecap))
 977                return -ENOENT;
 978
 979        /*
 980         * queued invalidation is already setup and enabled.
 981         */
 982        if (iommu->qi)
 983                return 0;
 984
 985        iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
 986        if (!iommu->qi)
 987                return -ENOMEM;
 988
 989        qi = iommu->qi;
 990
 991
 992        desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
 993        if (!desc_page) {
 994                kfree(qi);
 995                iommu->qi = 0;
 996                return -ENOMEM;
 997        }
 998
 999        qi->desc = page_address(desc_page);
1000
1001        qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1002        if (!qi->desc_status) {
1003                free_page((unsigned long) qi->desc);
1004                kfree(qi);
1005                iommu->qi = 0;
1006                return -ENOMEM;
1007        }
1008
1009        qi->free_head = qi->free_tail = 0;
1010        qi->free_cnt = QI_LENGTH;
1011
1012        raw_spin_lock_init(&qi->q_lock);
1013
1014        __dmar_enable_qi(iommu);
1015
1016        return 0;
1017}
1018
1019/* iommu interrupt handling. Most stuff are MSI-like. */
1020
1021enum faulttype {
1022        DMA_REMAP,
1023        INTR_REMAP,
1024        UNKNOWN,
1025};
1026
1027static const char *dma_remap_fault_reasons[] =
1028{
1029        "Software",
1030        "Present bit in root entry is clear",
1031        "Present bit in context entry is clear",
1032        "Invalid context entry",
1033        "Access beyond MGAW",
1034        "PTE Write access is not set",
1035        "PTE Read access is not set",
1036        "Next page table ptr is invalid",
1037        "Root table address invalid",
1038        "Context table ptr is invalid",
1039        "non-zero reserved fields in RTP",
1040        "non-zero reserved fields in CTP",
1041        "non-zero reserved fields in PTE",
1042};
1043
1044static const char *intr_remap_fault_reasons[] =
1045{
1046        "Detected reserved fields in the decoded interrupt-remapped request",
1047        "Interrupt index exceeded the interrupt-remapping table size",
1048        "Present field in the IRTE entry is clear",
1049        "Error accessing interrupt-remapping table pointed by IRTA_REG",
1050        "Detected reserved fields in the IRTE entry",
1051        "Blocked a compatibility format interrupt request",
1052        "Blocked an interrupt request due to source-id verification failure",
1053};
1054
1055#define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1056
1057const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1058{
1059        if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1060                                     ARRAY_SIZE(intr_remap_fault_reasons))) {
1061                *fault_type = INTR_REMAP;
1062                return intr_remap_fault_reasons[fault_reason - 0x20];
1063        } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1064                *fault_type = DMA_REMAP;
1065                return dma_remap_fault_reasons[fault_reason];
1066        } else {
1067                *fault_type = UNKNOWN;
1068                return "Unknown";
1069        }
1070}
1071
1072void dmar_msi_unmask(struct irq_data *data)
1073{
1074        struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1075        unsigned long flag;
1076
1077        /* unmask it */
1078        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1079        writel(0, iommu->reg + DMAR_FECTL_REG);
1080        /* Read a reg to force flush the post write */
1081        readl(iommu->reg + DMAR_FECTL_REG);
1082        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1083}
1084
1085void dmar_msi_mask(struct irq_data *data)
1086{
1087        unsigned long flag;
1088        struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1089
1090        /* mask it */
1091        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1092        writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1093        /* Read a reg to force flush the post write */
1094        readl(iommu->reg + DMAR_FECTL_REG);
1095        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1096}
1097
1098void dmar_msi_write(int irq, struct msi_msg *msg)
1099{
1100        struct intel_iommu *iommu = irq_get_handler_data(irq);
1101        unsigned long flag;
1102
1103        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1104        writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1105        writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1106        writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1107        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1108}
1109
1110void dmar_msi_read(int irq, struct msi_msg *msg)
1111{
1112        struct intel_iommu *iommu = irq_get_handler_data(irq);
1113        unsigned long flag;
1114
1115        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1116        msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1117        msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1118        msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1119        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1120}
1121
1122static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1123                u8 fault_reason, u16 source_id, unsigned long long addr)
1124{
1125        const char *reason;
1126        int fault_type;
1127
1128        reason = dmar_get_fault_reason(fault_reason, &fault_type);
1129
1130        if (fault_type == INTR_REMAP)
1131                printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1132                       "fault index %llx\n"
1133                        "INTR-REMAP:[fault reason %02d] %s\n",
1134                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1135                        PCI_FUNC(source_id & 0xFF), addr >> 48,
1136                        fault_reason, reason);
1137        else
1138                printk(KERN_ERR
1139                       "DMAR:[%s] Request device [%02x:%02x.%d] "
1140                       "fault addr %llx \n"
1141                       "DMAR:[fault reason %02d] %s\n",
1142                       (type ? "DMA Read" : "DMA Write"),
1143                       (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1144                       PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1145        return 0;
1146}
1147
1148#define PRIMARY_FAULT_REG_LEN (16)
1149irqreturn_t dmar_fault(int irq, void *dev_id)
1150{
1151        struct intel_iommu *iommu = dev_id;
1152        int reg, fault_index;
1153        u32 fault_status;
1154        unsigned long flag;
1155
1156        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1157        fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1158        if (fault_status)
1159                printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1160                       fault_status);
1161
1162        /* TBD: ignore advanced fault log currently */
1163        if (!(fault_status & DMA_FSTS_PPF))
1164                goto clear_rest;
1165
1166        fault_index = dma_fsts_fault_record_index(fault_status);
1167        reg = cap_fault_reg_offset(iommu->cap);
1168        while (1) {
1169                u8 fault_reason;
1170                u16 source_id;
1171                u64 guest_addr;
1172                int type;
1173                u32 data;
1174
1175                /* highest 32 bits */
1176                data = readl(iommu->reg + reg +
1177                                fault_index * PRIMARY_FAULT_REG_LEN + 12);
1178                if (!(data & DMA_FRCD_F))
1179                        break;
1180
1181                fault_reason = dma_frcd_fault_reason(data);
1182                type = dma_frcd_type(data);
1183
1184                data = readl(iommu->reg + reg +
1185                                fault_index * PRIMARY_FAULT_REG_LEN + 8);
1186                source_id = dma_frcd_source_id(data);
1187
1188                guest_addr = dmar_readq(iommu->reg + reg +
1189                                fault_index * PRIMARY_FAULT_REG_LEN);
1190                guest_addr = dma_frcd_page_addr(guest_addr);
1191                /* clear the fault */
1192                writel(DMA_FRCD_F, iommu->reg + reg +
1193                        fault_index * PRIMARY_FAULT_REG_LEN + 12);
1194
1195                raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1196
1197                dmar_fault_do_one(iommu, type, fault_reason,
1198                                source_id, guest_addr);
1199
1200                fault_index++;
1201                if (fault_index >= cap_num_fault_regs(iommu->cap))
1202                        fault_index = 0;
1203                raw_spin_lock_irqsave(&iommu->register_lock, flag);
1204        }
1205clear_rest:
1206        /* clear all the other faults */
1207        fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1208        writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1209
1210        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1211        return IRQ_HANDLED;
1212}
1213
1214int dmar_set_interrupt(struct intel_iommu *iommu)
1215{
1216        int irq, ret;
1217
1218        /*
1219         * Check if the fault interrupt is already initialized.
1220         */
1221        if (iommu->irq)
1222                return 0;
1223
1224        irq = create_irq();
1225        if (!irq) {
1226                printk(KERN_ERR "IOMMU: no free vectors\n");
1227                return -EINVAL;
1228        }
1229
1230        irq_set_handler_data(irq, iommu);
1231        iommu->irq = irq;
1232
1233        ret = arch_setup_dmar_msi(irq);
1234        if (ret) {
1235                irq_set_handler_data(irq, NULL);
1236                iommu->irq = 0;
1237                destroy_irq(irq);
1238                return ret;
1239        }
1240
1241        ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1242        if (ret)
1243                printk(KERN_ERR "IOMMU: can't request irq\n");
1244        return ret;
1245}
1246
1247int __init enable_drhd_fault_handling(void)
1248{
1249        struct dmar_drhd_unit *drhd;
1250
1251        /*
1252         * Enable fault control interrupt.
1253         */
1254        for_each_drhd_unit(drhd) {
1255                int ret;
1256                struct intel_iommu *iommu = drhd->iommu;
1257                ret = dmar_set_interrupt(iommu);
1258
1259                if (ret) {
1260                        printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1261                               " interrupt, ret %d\n",
1262                               (unsigned long long)drhd->reg_base_addr, ret);
1263                        return -1;
1264                }
1265
1266                /*
1267                 * Clear any previous faults.
1268                 */
1269                dmar_fault(iommu->irq, iommu);
1270        }
1271
1272        return 0;
1273}
1274
1275/*
1276 * Re-enable Queued Invalidation interface.
1277 */
1278int dmar_reenable_qi(struct intel_iommu *iommu)
1279{
1280        if (!ecap_qis(iommu->ecap))
1281                return -ENOENT;
1282
1283        if (!iommu->qi)
1284                return -ENOENT;
1285
1286        /*
1287         * First disable queued invalidation.
1288         */
1289        dmar_disable_qi(iommu);
1290        /*
1291         * Then enable queued invalidation again. Since there is no pending
1292         * invalidation requests now, it's safe to re-enable queued
1293         * invalidation.
1294         */
1295        __dmar_enable_qi(iommu);
1296
1297        return 0;
1298}
1299
1300/*
1301 * Check interrupt remapping support in DMAR table description.
1302 */
1303int __init dmar_ir_support(void)
1304{
1305        struct acpi_table_dmar *dmar;
1306        dmar = (struct acpi_table_dmar *)dmar_tbl;
1307        if (!dmar)
1308                return 0;
1309        return dmar->flags & 0x1;
1310}
1311IOMMU_INIT_POST(detect_intel_iommu);
1312