linux/drivers/xen/swiotlb-xen.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  Copyright 2010
   4 *  by Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
   5 *
   6 * This code provides a IOMMU for Xen PV guests with PCI passthrough.
   7 *
   8 * PV guests under Xen are running in an non-contiguous memory architecture.
   9 *
  10 * When PCI pass-through is utilized, this necessitates an IOMMU for
  11 * translating bus (DMA) to virtual and vice-versa and also providing a
  12 * mechanism to have contiguous pages for device drivers operations (say DMA
  13 * operations).
  14 *
  15 * Specifically, under Xen the Linux idea of pages is an illusion. It
  16 * assumes that pages start at zero and go up to the available memory. To
  17 * help with that, the Linux Xen MMU provides a lookup mechanism to
  18 * translate the page frame numbers (PFN) to machine frame numbers (MFN)
  19 * and vice-versa. The MFN are the "real" frame numbers. Furthermore
  20 * memory is not contiguous. Xen hypervisor stitches memory for guests
  21 * from different pools, which means there is no guarantee that PFN==MFN
  22 * and PFN+1==MFN+1. Lastly with Xen 4.0, pages (in debug mode) are
  23 * allocated in descending order (high to low), meaning the guest might
  24 * never get any MFN's under the 4GB mark.
  25 */
  26
  27#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
  28
  29#include <linux/memblock.h>
  30#include <linux/dma-direct.h>
  31#include <linux/dma-map-ops.h>
  32#include <linux/export.h>
  33#include <xen/swiotlb-xen.h>
  34#include <xen/page.h>
  35#include <xen/xen-ops.h>
  36#include <xen/hvc-console.h>
  37
  38#include <asm/dma-mapping.h>
  39#include <asm/xen/page-coherent.h>
  40
  41#include <trace/events/swiotlb.h>
  42#define MAX_DMA_BITS 32
  43
  44/*
  45 * Quick lookup value of the bus address of the IOTLB.
  46 */
  47
  48static inline phys_addr_t xen_phys_to_bus(struct device *dev, phys_addr_t paddr)
  49{
  50        unsigned long bfn = pfn_to_bfn(XEN_PFN_DOWN(paddr));
  51        phys_addr_t baddr = (phys_addr_t)bfn << XEN_PAGE_SHIFT;
  52
  53        baddr |= paddr & ~XEN_PAGE_MASK;
  54        return baddr;
  55}
  56
  57static inline dma_addr_t xen_phys_to_dma(struct device *dev, phys_addr_t paddr)
  58{
  59        return phys_to_dma(dev, xen_phys_to_bus(dev, paddr));
  60}
  61
  62static inline phys_addr_t xen_bus_to_phys(struct device *dev,
  63                                          phys_addr_t baddr)
  64{
  65        unsigned long xen_pfn = bfn_to_pfn(XEN_PFN_DOWN(baddr));
  66        phys_addr_t paddr = (xen_pfn << XEN_PAGE_SHIFT) |
  67                            (baddr & ~XEN_PAGE_MASK);
  68
  69        return paddr;
  70}
  71
  72static inline phys_addr_t xen_dma_to_phys(struct device *dev,
  73                                          dma_addr_t dma_addr)
  74{
  75        return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr));
  76}
  77
  78static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
  79{
  80        unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p);
  81        unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size);
  82
  83        next_bfn = pfn_to_bfn(xen_pfn);
  84
  85        for (i = 1; i < nr_pages; i++)
  86                if (pfn_to_bfn(++xen_pfn) != ++next_bfn)
  87                        return 1;
  88
  89        return 0;
  90}
  91
  92static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
  93{
  94        unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
  95        unsigned long xen_pfn = bfn_to_local_pfn(bfn);
  96        phys_addr_t paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT;
  97
  98        /* If the address is outside our domain, it CAN
  99         * have the same virtual address as another address
 100         * in our domain. Therefore _only_ check address within our domain.
 101         */
 102        if (pfn_valid(PFN_DOWN(paddr)))
 103                return is_swiotlb_buffer(dev, paddr);
 104        return 0;
 105}
 106
 107static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
 108{
 109        int rc;
 110        unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT);
 111        unsigned int i, dma_bits = order + PAGE_SHIFT;
 112        dma_addr_t dma_handle;
 113        phys_addr_t p = virt_to_phys(buf);
 114
 115        BUILD_BUG_ON(IO_TLB_SEGSIZE & (IO_TLB_SEGSIZE - 1));
 116        BUG_ON(nslabs % IO_TLB_SEGSIZE);
 117
 118        i = 0;
 119        do {
 120                do {
 121                        rc = xen_create_contiguous_region(
 122                                p + (i << IO_TLB_SHIFT), order,
 123                                dma_bits, &dma_handle);
 124                } while (rc && dma_bits++ < MAX_DMA_BITS);
 125                if (rc)
 126                        return rc;
 127
 128                i += IO_TLB_SEGSIZE;
 129        } while (i < nslabs);
 130        return 0;
 131}
 132
 133enum xen_swiotlb_err {
 134        XEN_SWIOTLB_UNKNOWN = 0,
 135        XEN_SWIOTLB_ENOMEM,
 136        XEN_SWIOTLB_EFIXUP
 137};
 138
 139static const char *xen_swiotlb_error(enum xen_swiotlb_err err)
 140{
 141        switch (err) {
 142        case XEN_SWIOTLB_ENOMEM:
 143                return "Cannot allocate Xen-SWIOTLB buffer\n";
 144        case XEN_SWIOTLB_EFIXUP:
 145                return "Failed to get contiguous memory for DMA from Xen!\n"\
 146                    "You either: don't have the permissions, do not have"\
 147                    " enough free memory under 4GB, or the hypervisor memory"\
 148                    " is too fragmented!";
 149        default:
 150                break;
 151        }
 152        return "";
 153}
 154
 155int xen_swiotlb_init(void)
 156{
 157        enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
 158        unsigned long bytes = swiotlb_size_or_default();
 159        unsigned long nslabs = bytes >> IO_TLB_SHIFT;
 160        unsigned int order, repeat = 3;
 161        int rc = -ENOMEM;
 162        char *start;
 163
 164        if (io_tlb_default_mem.nslabs) {
 165                pr_warn("swiotlb buffer already initialized\n");
 166                return -EEXIST;
 167        }
 168
 169retry:
 170        m_ret = XEN_SWIOTLB_ENOMEM;
 171        order = get_order(bytes);
 172
 173        /*
 174         * Get IO TLB memory from any location.
 175         */
 176#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 177#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
 178        while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
 179                start = (void *)xen_get_swiotlb_free_pages(order);
 180                if (start)
 181                        break;
 182                order--;
 183        }
 184        if (!start)
 185                goto exit;
 186        if (order != get_order(bytes)) {
 187                pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n",
 188                        (PAGE_SIZE << order) >> 20);
 189                nslabs = SLABS_PER_PAGE << order;
 190                bytes = nslabs << IO_TLB_SHIFT;
 191        }
 192
 193        /*
 194         * And replace that memory with pages under 4GB.
 195         */
 196        rc = xen_swiotlb_fixup(start, nslabs);
 197        if (rc) {
 198                free_pages((unsigned long)start, order);
 199                m_ret = XEN_SWIOTLB_EFIXUP;
 200                goto error;
 201        }
 202        rc = swiotlb_late_init_with_tbl(start, nslabs);
 203        if (rc)
 204                return rc;
 205        swiotlb_set_max_segment(PAGE_SIZE);
 206        return 0;
 207error:
 208        if (nslabs > 1024 && repeat--) {
 209                /* Min is 2MB */
 210                nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
 211                bytes = nslabs << IO_TLB_SHIFT;
 212                pr_info("Lowering to %luMB\n", bytes >> 20);
 213                goto retry;
 214        }
 215exit:
 216        pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc);
 217        return rc;
 218}
 219
 220#ifdef CONFIG_X86
 221void __init xen_swiotlb_init_early(void)
 222{
 223        unsigned long bytes = swiotlb_size_or_default();
 224        unsigned long nslabs = bytes >> IO_TLB_SHIFT;
 225        unsigned int repeat = 3;
 226        char *start;
 227        int rc;
 228
 229retry:
 230        /*
 231         * Get IO TLB memory from any location.
 232         */
 233        start = memblock_alloc(PAGE_ALIGN(bytes),
 234                               IO_TLB_SEGSIZE << IO_TLB_SHIFT);
 235        if (!start)
 236                panic("%s: Failed to allocate %lu bytes\n",
 237                      __func__, PAGE_ALIGN(bytes));
 238
 239        /*
 240         * And replace that memory with pages under 4GB.
 241         */
 242        rc = xen_swiotlb_fixup(start, nslabs);
 243        if (rc) {
 244                memblock_free(__pa(start), PAGE_ALIGN(bytes));
 245                if (nslabs > 1024 && repeat--) {
 246                        /* Min is 2MB */
 247                        nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
 248                        bytes = nslabs << IO_TLB_SHIFT;
 249                        pr_info("Lowering to %luMB\n", bytes >> 20);
 250                        goto retry;
 251                }
 252                panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc);
 253        }
 254
 255        if (swiotlb_init_with_tbl(start, nslabs, true))
 256                panic("Cannot allocate SWIOTLB buffer");
 257        swiotlb_set_max_segment(PAGE_SIZE);
 258}
 259#endif /* CONFIG_X86 */
 260
 261static void *
 262xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 263                           dma_addr_t *dma_handle, gfp_t flags,
 264                           unsigned long attrs)
 265{
 266        void *ret;
 267        int order = get_order(size);
 268        u64 dma_mask = DMA_BIT_MASK(32);
 269        phys_addr_t phys;
 270        dma_addr_t dev_addr;
 271
 272        /*
 273        * Ignore region specifiers - the kernel's ideas of
 274        * pseudo-phys memory layout has nothing to do with the
 275        * machine physical layout.  We can't allocate highmem
 276        * because we can't return a pointer to it.
 277        */
 278        flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
 279
 280        /* Convert the size to actually allocated. */
 281        size = 1UL << (order + XEN_PAGE_SHIFT);
 282
 283        /* On ARM this function returns an ioremap'ped virtual address for
 284         * which virt_to_phys doesn't return the corresponding physical
 285         * address. In fact on ARM virt_to_phys only works for kernel direct
 286         * mapped RAM memory. Also see comment below.
 287         */
 288        ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs);
 289
 290        if (!ret)
 291                return ret;
 292
 293        if (hwdev && hwdev->coherent_dma_mask)
 294                dma_mask = hwdev->coherent_dma_mask;
 295
 296        /* At this point dma_handle is the dma address, next we are
 297         * going to set it to the machine address.
 298         * Do not use virt_to_phys(ret) because on ARM it doesn't correspond
 299         * to *dma_handle. */
 300        phys = dma_to_phys(hwdev, *dma_handle);
 301        dev_addr = xen_phys_to_dma(hwdev, phys);
 302        if (((dev_addr + size - 1 <= dma_mask)) &&
 303            !range_straddles_page_boundary(phys, size))
 304                *dma_handle = dev_addr;
 305        else {
 306                if (xen_create_contiguous_region(phys, order,
 307                                                 fls64(dma_mask), dma_handle) != 0) {
 308                        xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
 309                        return NULL;
 310                }
 311                *dma_handle = phys_to_dma(hwdev, *dma_handle);
 312                SetPageXenRemapped(virt_to_page(ret));
 313        }
 314        memset(ret, 0, size);
 315        return ret;
 316}
 317
 318static void
 319xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 320                          dma_addr_t dev_addr, unsigned long attrs)
 321{
 322        int order = get_order(size);
 323        phys_addr_t phys;
 324        u64 dma_mask = DMA_BIT_MASK(32);
 325        struct page *page;
 326
 327        if (hwdev && hwdev->coherent_dma_mask)
 328                dma_mask = hwdev->coherent_dma_mask;
 329
 330        /* do not use virt_to_phys because on ARM it doesn't return you the
 331         * physical address */
 332        phys = xen_dma_to_phys(hwdev, dev_addr);
 333
 334        /* Convert the size to actually allocated. */
 335        size = 1UL << (order + XEN_PAGE_SHIFT);
 336
 337        if (is_vmalloc_addr(vaddr))
 338                page = vmalloc_to_page(vaddr);
 339        else
 340                page = virt_to_page(vaddr);
 341
 342        if (!WARN_ON((dev_addr + size - 1 > dma_mask) ||
 343                     range_straddles_page_boundary(phys, size)) &&
 344            TestClearPageXenRemapped(page))
 345                xen_destroy_contiguous_region(phys, order);
 346
 347        xen_free_coherent_pages(hwdev, size, vaddr, phys_to_dma(hwdev, phys),
 348                                attrs);
 349}
 350
 351/*
 352 * Map a single buffer of the indicated size for DMA in streaming mode.  The
 353 * physical address to use is returned.
 354 *
 355 * Once the device is given the dma address, the device owns this memory until
 356 * either xen_swiotlb_unmap_page or xen_swiotlb_dma_sync_single is performed.
 357 */
 358static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 359                                unsigned long offset, size_t size,
 360                                enum dma_data_direction dir,
 361                                unsigned long attrs)
 362{
 363        phys_addr_t map, phys = page_to_phys(page) + offset;
 364        dma_addr_t dev_addr = xen_phys_to_dma(dev, phys);
 365
 366        BUG_ON(dir == DMA_NONE);
 367        /*
 368         * If the address happens to be in the device's DMA window,
 369         * we can safely return the device addr and not worry about bounce
 370         * buffering it.
 371         */
 372        if (dma_capable(dev, dev_addr, size, true) &&
 373            !range_straddles_page_boundary(phys, size) &&
 374                !xen_arch_need_swiotlb(dev, phys, dev_addr) &&
 375                !is_swiotlb_force_bounce(dev))
 376                goto done;
 377
 378        /*
 379         * Oh well, have to allocate and map a bounce buffer.
 380         */
 381        trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
 382
 383        map = swiotlb_tbl_map_single(dev, phys, size, size, dir, attrs);
 384        if (map == (phys_addr_t)DMA_MAPPING_ERROR)
 385                return DMA_MAPPING_ERROR;
 386
 387        phys = map;
 388        dev_addr = xen_phys_to_dma(dev, map);
 389
 390        /*
 391         * Ensure that the address returned is DMA'ble
 392         */
 393        if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
 394                swiotlb_tbl_unmap_single(dev, map, size, dir,
 395                                attrs | DMA_ATTR_SKIP_CPU_SYNC);
 396                return DMA_MAPPING_ERROR;
 397        }
 398
 399done:
 400        if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
 401                if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr))))
 402                        arch_sync_dma_for_device(phys, size, dir);
 403                else
 404                        xen_dma_sync_for_device(dev, dev_addr, size, dir);
 405        }
 406        return dev_addr;
 407}
 408
 409/*
 410 * Unmap a single streaming mode DMA translation.  The dma_addr and size must
 411 * match what was provided for in a previous xen_swiotlb_map_page call.  All
 412 * other usages are undefined.
 413 *
 414 * After this call, reads by the cpu to the buffer are guaranteed to see
 415 * whatever the device wrote there.
 416 */
 417static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 418                size_t size, enum dma_data_direction dir, unsigned long attrs)
 419{
 420        phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr);
 421
 422        BUG_ON(dir == DMA_NONE);
 423
 424        if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
 425                if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr))))
 426                        arch_sync_dma_for_cpu(paddr, size, dir);
 427                else
 428                        xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
 429        }
 430
 431        /* NOTE: We use dev_addr here, not paddr! */
 432        if (is_xen_swiotlb_buffer(hwdev, dev_addr))
 433                swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
 434}
 435
 436static void
 437xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
 438                size_t size, enum dma_data_direction dir)
 439{
 440        phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
 441
 442        if (!dev_is_dma_coherent(dev)) {
 443                if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
 444                        arch_sync_dma_for_cpu(paddr, size, dir);
 445                else
 446                        xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
 447        }
 448
 449        if (is_xen_swiotlb_buffer(dev, dma_addr))
 450                swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
 451}
 452
 453static void
 454xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
 455                size_t size, enum dma_data_direction dir)
 456{
 457        phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
 458
 459        if (is_xen_swiotlb_buffer(dev, dma_addr))
 460                swiotlb_sync_single_for_device(dev, paddr, size, dir);
 461
 462        if (!dev_is_dma_coherent(dev)) {
 463                if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
 464                        arch_sync_dma_for_device(paddr, size, dir);
 465                else
 466                        xen_dma_sync_for_device(dev, dma_addr, size, dir);
 467        }
 468}
 469
 470/*
 471 * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
 472 * concerning calls here are the same as for swiotlb_unmap_page() above.
 473 */
 474static void
 475xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
 476                enum dma_data_direction dir, unsigned long attrs)
 477{
 478        struct scatterlist *sg;
 479        int i;
 480
 481        BUG_ON(dir == DMA_NONE);
 482
 483        for_each_sg(sgl, sg, nelems, i)
 484                xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg),
 485                                dir, attrs);
 486
 487}
 488
 489static int
 490xen_swiotlb_map_sg(struct device *dev, struct scatterlist *sgl, int nelems,
 491                enum dma_data_direction dir, unsigned long attrs)
 492{
 493        struct scatterlist *sg;
 494        int i;
 495
 496        BUG_ON(dir == DMA_NONE);
 497
 498        for_each_sg(sgl, sg, nelems, i) {
 499                sg->dma_address = xen_swiotlb_map_page(dev, sg_page(sg),
 500                                sg->offset, sg->length, dir, attrs);
 501                if (sg->dma_address == DMA_MAPPING_ERROR)
 502                        goto out_unmap;
 503                sg_dma_len(sg) = sg->length;
 504        }
 505
 506        return nelems;
 507out_unmap:
 508        xen_swiotlb_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
 509        sg_dma_len(sgl) = 0;
 510        return -EIO;
 511}
 512
 513static void
 514xen_swiotlb_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
 515                            int nelems, enum dma_data_direction dir)
 516{
 517        struct scatterlist *sg;
 518        int i;
 519
 520        for_each_sg(sgl, sg, nelems, i) {
 521                xen_swiotlb_sync_single_for_cpu(dev, sg->dma_address,
 522                                sg->length, dir);
 523        }
 524}
 525
 526static void
 527xen_swiotlb_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
 528                               int nelems, enum dma_data_direction dir)
 529{
 530        struct scatterlist *sg;
 531        int i;
 532
 533        for_each_sg(sgl, sg, nelems, i) {
 534                xen_swiotlb_sync_single_for_device(dev, sg->dma_address,
 535                                sg->length, dir);
 536        }
 537}
 538
 539/*
 540 * Return whether the given device DMA address mask can be supported
 541 * properly.  For example, if your device can only drive the low 24-bits
 542 * during bus mastering, then you would pass 0x00ffffff as the mask to
 543 * this function.
 544 */
 545static int
 546xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
 547{
 548        return xen_phys_to_dma(hwdev, io_tlb_default_mem.end - 1) <= mask;
 549}
 550
 551const struct dma_map_ops xen_swiotlb_dma_ops = {
 552        .alloc = xen_swiotlb_alloc_coherent,
 553        .free = xen_swiotlb_free_coherent,
 554        .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
 555        .sync_single_for_device = xen_swiotlb_sync_single_for_device,
 556        .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
 557        .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
 558        .map_sg = xen_swiotlb_map_sg,
 559        .unmap_sg = xen_swiotlb_unmap_sg,
 560        .map_page = xen_swiotlb_map_page,
 561        .unmap_page = xen_swiotlb_unmap_page,
 562        .dma_supported = xen_swiotlb_dma_supported,
 563        .mmap = dma_common_mmap,
 564        .get_sgtable = dma_common_get_sgtable,
 565        .alloc_pages = dma_common_alloc_pages,
 566        .free_pages = dma_common_free_pages,
 567};
 568