linux/arch/s390/pci/pci_dma.c
<<
>>
Prefs
   1/*
   2 * Copyright IBM Corp. 2012
   3 *
   4 * Author(s):
   5 *   Jan Glauber <jang@linux.vnet.ibm.com>
   6 */
   7
   8#include <linux/kernel.h>
   9#include <linux/slab.h>
  10#include <linux/export.h>
  11#include <linux/iommu-helper.h>
  12#include <linux/dma-mapping.h>
  13#include <linux/vmalloc.h>
  14#include <linux/pci.h>
  15#include <asm/pci_dma.h>
  16
  17static struct kmem_cache *dma_region_table_cache;
  18static struct kmem_cache *dma_page_table_cache;
  19static int s390_iommu_strict;
  20
  21static int zpci_refresh_global(struct zpci_dev *zdev)
  22{
  23        return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
  24                                  zdev->iommu_pages * PAGE_SIZE);
  25}
  26
  27unsigned long *dma_alloc_cpu_table(void)
  28{
  29        unsigned long *table, *entry;
  30
  31        table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
  32        if (!table)
  33                return NULL;
  34
  35        for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
  36                *entry = ZPCI_TABLE_INVALID;
  37        return table;
  38}
  39
  40static void dma_free_cpu_table(void *table)
  41{
  42        kmem_cache_free(dma_region_table_cache, table);
  43}
  44
  45static unsigned long *dma_alloc_page_table(void)
  46{
  47        unsigned long *table, *entry;
  48
  49        table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
  50        if (!table)
  51                return NULL;
  52
  53        for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
  54                *entry = ZPCI_PTE_INVALID;
  55        return table;
  56}
  57
  58static void dma_free_page_table(void *table)
  59{
  60        kmem_cache_free(dma_page_table_cache, table);
  61}
  62
  63static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
  64{
  65        unsigned long *sto;
  66
  67        if (reg_entry_isvalid(*entry))
  68                sto = get_rt_sto(*entry);
  69        else {
  70                sto = dma_alloc_cpu_table();
  71                if (!sto)
  72                        return NULL;
  73
  74                set_rt_sto(entry, sto);
  75                validate_rt_entry(entry);
  76                entry_clr_protected(entry);
  77        }
  78        return sto;
  79}
  80
  81static unsigned long *dma_get_page_table_origin(unsigned long *entry)
  82{
  83        unsigned long *pto;
  84
  85        if (reg_entry_isvalid(*entry))
  86                pto = get_st_pto(*entry);
  87        else {
  88                pto = dma_alloc_page_table();
  89                if (!pto)
  90                        return NULL;
  91                set_st_pto(entry, pto);
  92                validate_st_entry(entry);
  93                entry_clr_protected(entry);
  94        }
  95        return pto;
  96}
  97
  98unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
  99{
 100        unsigned long *sto, *pto;
 101        unsigned int rtx, sx, px;
 102
 103        rtx = calc_rtx(dma_addr);
 104        sto = dma_get_seg_table_origin(&rto[rtx]);
 105        if (!sto)
 106                return NULL;
 107
 108        sx = calc_sx(dma_addr);
 109        pto = dma_get_page_table_origin(&sto[sx]);
 110        if (!pto)
 111                return NULL;
 112
 113        px = calc_px(dma_addr);
 114        return &pto[px];
 115}
 116
 117void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
 118{
 119        if (flags & ZPCI_PTE_INVALID) {
 120                invalidate_pt_entry(entry);
 121        } else {
 122                set_pt_pfaa(entry, page_addr);
 123                validate_pt_entry(entry);
 124        }
 125
 126        if (flags & ZPCI_TABLE_PROTECTED)
 127                entry_set_protected(entry);
 128        else
 129                entry_clr_protected(entry);
 130}
 131
 132static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
 133                            dma_addr_t dma_addr, size_t size, int flags)
 134{
 135        unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 136        u8 *page_addr = (u8 *) (pa & PAGE_MASK);
 137        dma_addr_t start_dma_addr = dma_addr;
 138        unsigned long irq_flags;
 139        unsigned long *entry;
 140        int i, rc = 0;
 141
 142        if (!nr_pages)
 143                return -EINVAL;
 144
 145        spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
 146        if (!zdev->dma_table) {
 147                rc = -EINVAL;
 148                goto no_refresh;
 149        }
 150
 151        for (i = 0; i < nr_pages; i++) {
 152                entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
 153                if (!entry) {
 154                        rc = -ENOMEM;
 155                        goto undo_cpu_trans;
 156                }
 157                dma_update_cpu_trans(entry, page_addr, flags);
 158                page_addr += PAGE_SIZE;
 159                dma_addr += PAGE_SIZE;
 160        }
 161
 162        /*
 163         * With zdev->tlb_refresh == 0, rpcit is not required to establish new
 164         * translations when previously invalid translation-table entries are
 165         * validated. With lazy unmap, it also is skipped for previously valid
 166         * entries, but a global rpcit is then required before any address can
 167         * be re-used, i.e. after each iommu bitmap wrap-around.
 168         */
 169        if (!zdev->tlb_refresh &&
 170                        (!s390_iommu_strict ||
 171                        ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
 172                goto no_refresh;
 173
 174        rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
 175                                nr_pages * PAGE_SIZE);
 176undo_cpu_trans:
 177        if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
 178                flags = ZPCI_PTE_INVALID;
 179                while (i-- > 0) {
 180                        page_addr -= PAGE_SIZE;
 181                        dma_addr -= PAGE_SIZE;
 182                        entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
 183                        if (!entry)
 184                                break;
 185                        dma_update_cpu_trans(entry, page_addr, flags);
 186                }
 187        }
 188
 189no_refresh:
 190        spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
 191        return rc;
 192}
 193
 194void dma_free_seg_table(unsigned long entry)
 195{
 196        unsigned long *sto = get_rt_sto(entry);
 197        int sx;
 198
 199        for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
 200                if (reg_entry_isvalid(sto[sx]))
 201                        dma_free_page_table(get_st_pto(sto[sx]));
 202
 203        dma_free_cpu_table(sto);
 204}
 205
 206void dma_cleanup_tables(unsigned long *table)
 207{
 208        int rtx;
 209
 210        if (!table)
 211                return;
 212
 213        for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
 214                if (reg_entry_isvalid(table[rtx]))
 215                        dma_free_seg_table(table[rtx]);
 216
 217        dma_free_cpu_table(table);
 218}
 219
 220static unsigned long __dma_alloc_iommu(struct device *dev,
 221                                       unsigned long start, int size)
 222{
 223        struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 224        unsigned long boundary_size;
 225
 226        boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
 227                              PAGE_SIZE) >> PAGE_SHIFT;
 228        return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
 229                                start, size, 0, boundary_size, 0);
 230}
 231
 232static unsigned long dma_alloc_iommu(struct device *dev, int size)
 233{
 234        struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 235        unsigned long offset, flags;
 236        int wrap = 0;
 237
 238        spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
 239        offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
 240        if (offset == -1) {
 241                /* wrap-around */
 242                offset = __dma_alloc_iommu(dev, 0, size);
 243                wrap = 1;
 244        }
 245
 246        if (offset != -1) {
 247                zdev->next_bit = offset + size;
 248                if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
 249                        /* global flush after wrap-around with lazy unmap */
 250                        zpci_refresh_global(zdev);
 251        }
 252        spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
 253        return offset;
 254}
 255
 256static void dma_free_iommu(struct device *dev, unsigned long offset, int size)
 257{
 258        struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 259        unsigned long flags;
 260
 261        spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
 262        if (!zdev->iommu_bitmap)
 263                goto out;
 264        bitmap_clear(zdev->iommu_bitmap, offset, size);
 265        /*
 266         * Lazy flush for unmap: need to move next_bit to avoid address re-use
 267         * until wrap-around.
 268         */
 269        if (!s390_iommu_strict && offset >= zdev->next_bit)
 270                zdev->next_bit = offset + size;
 271out:
 272        spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
 273}
 274
 275static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
 276{
 277        struct {
 278                unsigned long rc;
 279                unsigned long addr;
 280        } __packed data = {rc, addr};
 281
 282        zpci_err_hex(&data, sizeof(data));
 283}
 284
 285static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 286                                     unsigned long offset, size_t size,
 287                                     enum dma_data_direction direction,
 288                                     struct dma_attrs *attrs)
 289{
 290        struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 291        unsigned long nr_pages, iommu_page_index;
 292        unsigned long pa = page_to_phys(page) + offset;
 293        int flags = ZPCI_PTE_VALID;
 294        dma_addr_t dma_addr;
 295        int ret;
 296
 297        /* This rounds up number of pages based on size and offset */
 298        nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
 299        iommu_page_index = dma_alloc_iommu(dev, nr_pages);
 300        if (iommu_page_index == -1) {
 301                ret = -ENOSPC;
 302                goto out_err;
 303        }
 304
 305        /* Use rounded up size */
 306        size = nr_pages * PAGE_SIZE;
 307
 308        dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
 309        if (dma_addr + size > zdev->end_dma) {
 310                ret = -ERANGE;
 311                goto out_free;
 312        }
 313
 314        if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
 315                flags |= ZPCI_TABLE_PROTECTED;
 316
 317        ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
 318        if (ret)
 319                goto out_free;
 320
 321        atomic64_add(nr_pages, &zdev->mapped_pages);
 322        return dma_addr + (offset & ~PAGE_MASK);
 323
 324out_free:
 325        dma_free_iommu(dev, iommu_page_index, nr_pages);
 326out_err:
 327        zpci_err("map error:\n");
 328        zpci_err_dma(ret, pa);
 329        return DMA_ERROR_CODE;
 330}
 331
 332static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 333                                 size_t size, enum dma_data_direction direction,
 334                                 struct dma_attrs *attrs)
 335{
 336        struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 337        unsigned long iommu_page_index;
 338        int npages, ret;
 339
 340        npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
 341        dma_addr = dma_addr & PAGE_MASK;
 342        ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
 343                               ZPCI_PTE_INVALID);
 344        if (ret) {
 345                zpci_err("unmap error:\n");
 346                zpci_err_dma(ret, dma_addr);
 347                return;
 348        }
 349
 350        atomic64_add(npages, &zdev->unmapped_pages);
 351        iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
 352        dma_free_iommu(dev, iommu_page_index, npages);
 353}
 354
 355static void *s390_dma_alloc(struct device *dev, size_t size,
 356                            dma_addr_t *dma_handle, gfp_t flag,
 357                            struct dma_attrs *attrs)
 358{
 359        struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 360        struct page *page;
 361        unsigned long pa;
 362        dma_addr_t map;
 363
 364        size = PAGE_ALIGN(size);
 365        page = alloc_pages(flag, get_order(size));
 366        if (!page)
 367                return NULL;
 368
 369        pa = page_to_phys(page);
 370        memset((void *) pa, 0, size);
 371
 372        map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, NULL);
 373        if (dma_mapping_error(dev, map)) {
 374                free_pages(pa, get_order(size));
 375                return NULL;
 376        }
 377
 378        atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
 379        if (dma_handle)
 380                *dma_handle = map;
 381        return (void *) pa;
 382}
 383
 384static void s390_dma_free(struct device *dev, size_t size,
 385                          void *pa, dma_addr_t dma_handle,
 386                          struct dma_attrs *attrs)
 387{
 388        struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 389
 390        size = PAGE_ALIGN(size);
 391        atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
 392        s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
 393        free_pages((unsigned long) pa, get_order(size));
 394}
 395
 396static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 397                           int nr_elements, enum dma_data_direction dir,
 398                           struct dma_attrs *attrs)
 399{
 400        int mapped_elements = 0;
 401        struct scatterlist *s;
 402        int i;
 403
 404        for_each_sg(sg, s, nr_elements, i) {
 405                struct page *page = sg_page(s);
 406                s->dma_address = s390_dma_map_pages(dev, page, s->offset,
 407                                                    s->length, dir, NULL);
 408                if (!dma_mapping_error(dev, s->dma_address)) {
 409                        s->dma_length = s->length;
 410                        mapped_elements++;
 411                } else
 412                        goto unmap;
 413        }
 414out:
 415        return mapped_elements;
 416
 417unmap:
 418        for_each_sg(sg, s, mapped_elements, i) {
 419                if (s->dma_address)
 420                        s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
 421                                             dir, NULL);
 422                s->dma_address = 0;
 423                s->dma_length = 0;
 424        }
 425        mapped_elements = 0;
 426        goto out;
 427}
 428
 429static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 430                              int nr_elements, enum dma_data_direction dir,
 431                              struct dma_attrs *attrs)
 432{
 433        struct scatterlist *s;
 434        int i;
 435
 436        for_each_sg(sg, s, nr_elements, i) {
 437                s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, NULL);
 438                s->dma_address = 0;
 439                s->dma_length = 0;
 440        }
 441}
 442
 443int zpci_dma_init_device(struct zpci_dev *zdev)
 444{
 445        int rc;
 446
 447        /*
 448         * At this point, if the device is part of an IOMMU domain, this would
 449         * be a strong hint towards a bug in the IOMMU API (common) code and/or
 450         * simultaneous access via IOMMU and DMA API. So let's issue a warning.
 451         */
 452        WARN_ON(zdev->s390_domain);
 453
 454        spin_lock_init(&zdev->iommu_bitmap_lock);
 455        spin_lock_init(&zdev->dma_table_lock);
 456
 457        zdev->dma_table = dma_alloc_cpu_table();
 458        if (!zdev->dma_table) {
 459                rc = -ENOMEM;
 460                goto out;
 461        }
 462
 463        /*
 464         * Restrict the iommu bitmap size to the minimum of the following:
 465         * - main memory size
 466         * - 3-level pagetable address limit minus start_dma offset
 467         * - DMA address range allowed by the hardware (clp query pci fn)
 468         *
 469         * Also set zdev->end_dma to the actual end address of the usable
 470         * range, instead of the theoretical maximum as reported by hardware.
 471         */
 472        zdev->iommu_size = min3((u64) high_memory,
 473                                ZPCI_TABLE_SIZE_RT - zdev->start_dma,
 474                                zdev->end_dma - zdev->start_dma + 1);
 475        zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
 476        zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
 477        zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
 478        if (!zdev->iommu_bitmap) {
 479                rc = -ENOMEM;
 480                goto free_dma_table;
 481        }
 482
 483        rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
 484                                (u64) zdev->dma_table);
 485        if (rc)
 486                goto free_bitmap;
 487
 488        return 0;
 489free_bitmap:
 490        vfree(zdev->iommu_bitmap);
 491        zdev->iommu_bitmap = NULL;
 492free_dma_table:
 493        dma_free_cpu_table(zdev->dma_table);
 494        zdev->dma_table = NULL;
 495out:
 496        return rc;
 497}
 498
 499void zpci_dma_exit_device(struct zpci_dev *zdev)
 500{
 501        /*
 502         * At this point, if the device is part of an IOMMU domain, this would
 503         * be a strong hint towards a bug in the IOMMU API (common) code and/or
 504         * simultaneous access via IOMMU and DMA API. So let's issue a warning.
 505         */
 506        WARN_ON(zdev->s390_domain);
 507
 508        zpci_unregister_ioat(zdev, 0);
 509        dma_cleanup_tables(zdev->dma_table);
 510        zdev->dma_table = NULL;
 511        vfree(zdev->iommu_bitmap);
 512        zdev->iommu_bitmap = NULL;
 513        zdev->next_bit = 0;
 514}
 515
 516static int __init dma_alloc_cpu_table_caches(void)
 517{
 518        dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
 519                                        ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
 520                                        0, NULL);
 521        if (!dma_region_table_cache)
 522                return -ENOMEM;
 523
 524        dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
 525                                        ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
 526                                        0, NULL);
 527        if (!dma_page_table_cache) {
 528                kmem_cache_destroy(dma_region_table_cache);
 529                return -ENOMEM;
 530        }
 531        return 0;
 532}
 533
 534int __init zpci_dma_init(void)
 535{
 536        return dma_alloc_cpu_table_caches();
 537}
 538
 539void zpci_dma_exit(void)
 540{
 541        kmem_cache_destroy(dma_page_table_cache);
 542        kmem_cache_destroy(dma_region_table_cache);
 543}
 544
 545#define PREALLOC_DMA_DEBUG_ENTRIES      (1 << 16)
 546
 547static int __init dma_debug_do_init(void)
 548{
 549        dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
 550        return 0;
 551}
 552fs_initcall(dma_debug_do_init);
 553
 554struct dma_map_ops s390_pci_dma_ops = {
 555        .alloc          = s390_dma_alloc,
 556        .free           = s390_dma_free,
 557        .map_sg         = s390_dma_map_sg,
 558        .unmap_sg       = s390_dma_unmap_sg,
 559        .map_page       = s390_dma_map_pages,
 560        .unmap_page     = s390_dma_unmap_pages,
 561        /* if we support direct DMA this must be conditional */
 562        .is_phys        = 0,
 563        /* dma_supported is unconditionally true without a callback */
 564};
 565EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
 566
 567static int __init s390_iommu_setup(char *str)
 568{
 569        if (!strncmp(str, "strict", 6))
 570                s390_iommu_strict = 1;
 571        return 0;
 572}
 573
 574__setup("s390_iommu=", s390_iommu_setup);
 575