linux/arch/s390/pci/pci_dma.c
<<
>>
Prefs
   1/*
   2 * Copyright IBM Corp. 2012
   3 *
   4 * Author(s):
   5 *   Jan Glauber <jang@linux.vnet.ibm.com>
   6 */
   7
   8#include <linux/kernel.h>
   9#include <linux/slab.h>
  10#include <linux/export.h>
  11#include <linux/iommu-helper.h>
  12#include <linux/dma-mapping.h>
  13#include <linux/vmalloc.h>
  14#include <linux/pci.h>
  15#include <asm/pci_dma.h>
  16
  17static struct kmem_cache *dma_region_table_cache;
  18static struct kmem_cache *dma_page_table_cache;
  19static int s390_iommu_strict;
  20
  21static int zpci_refresh_global(struct zpci_dev *zdev)
  22{
  23        return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
  24                                  zdev->iommu_pages * PAGE_SIZE);
  25}
  26
  27unsigned long *dma_alloc_cpu_table(void)
  28{
  29        unsigned long *table, *entry;
  30
  31        table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
  32        if (!table)
  33                return NULL;
  34
  35        for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
  36                *entry = ZPCI_TABLE_INVALID;
  37        return table;
  38}
  39
  40static void dma_free_cpu_table(void *table)
  41{
  42        kmem_cache_free(dma_region_table_cache, table);
  43}
  44
  45static unsigned long *dma_alloc_page_table(void)
  46{
  47        unsigned long *table, *entry;
  48
  49        table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
  50        if (!table)
  51                return NULL;
  52
  53        for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
  54                *entry = ZPCI_PTE_INVALID;
  55        return table;
  56}
  57
  58static void dma_free_page_table(void *table)
  59{
  60        kmem_cache_free(dma_page_table_cache, table);
  61}
  62
  63static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
  64{
  65        unsigned long *sto;
  66
  67        if (reg_entry_isvalid(*entry))
  68                sto = get_rt_sto(*entry);
  69        else {
  70                sto = dma_alloc_cpu_table();
  71                if (!sto)
  72                        return NULL;
  73
  74                set_rt_sto(entry, sto);
  75                validate_rt_entry(entry);
  76                entry_clr_protected(entry);
  77        }
  78        return sto;
  79}
  80
  81static unsigned long *dma_get_page_table_origin(unsigned long *entry)
  82{
  83        unsigned long *pto;
  84
  85        if (reg_entry_isvalid(*entry))
  86                pto = get_st_pto(*entry);
  87        else {
  88                pto = dma_alloc_page_table();
  89                if (!pto)
  90                        return NULL;
  91                set_st_pto(entry, pto);
  92                validate_st_entry(entry);
  93                entry_clr_protected(entry);
  94        }
  95        return pto;
  96}
  97
  98unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
  99{
 100        unsigned long *sto, *pto;
 101        unsigned int rtx, sx, px;
 102
 103        rtx = calc_rtx(dma_addr);
 104        sto = dma_get_seg_table_origin(&rto[rtx]);
 105        if (!sto)
 106                return NULL;
 107
 108        sx = calc_sx(dma_addr);
 109        pto = dma_get_page_table_origin(&sto[sx]);
 110        if (!pto)
 111                return NULL;
 112
 113        px = calc_px(dma_addr);
 114        return &pto[px];
 115}
 116
 117void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
 118{
 119        if (flags & ZPCI_PTE_INVALID) {
 120                invalidate_pt_entry(entry);
 121        } else {
 122                set_pt_pfaa(entry, page_addr);
 123                validate_pt_entry(entry);
 124        }
 125
 126        if (flags & ZPCI_TABLE_PROTECTED)
 127                entry_set_protected(entry);
 128        else
 129                entry_clr_protected(entry);
 130}
 131
 132static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
 133                            dma_addr_t dma_addr, size_t size, int flags)
 134{
 135        unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 136        u8 *page_addr = (u8 *) (pa & PAGE_MASK);
 137        dma_addr_t start_dma_addr = dma_addr;
 138        unsigned long irq_flags;
 139        unsigned long *entry;
 140        int i, rc = 0;
 141
 142        if (!nr_pages)
 143                return -EINVAL;
 144
 145        spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
 146        if (!zdev->dma_table) {
 147                rc = -EINVAL;
 148                goto no_refresh;
 149        }
 150
 151        for (i = 0; i < nr_pages; i++) {
 152                entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
 153                if (!entry) {
 154                        rc = -ENOMEM;
 155                        goto undo_cpu_trans;
 156                }
 157                dma_update_cpu_trans(entry, page_addr, flags);
 158                page_addr += PAGE_SIZE;
 159                dma_addr += PAGE_SIZE;
 160        }
 161
 162        /*
 163         * With zdev->tlb_refresh == 0, rpcit is not required to establish new
 164         * translations when previously invalid translation-table entries are
 165         * validated. With lazy unmap, it also is skipped for previously valid
 166         * entries, but a global rpcit is then required before any address can
 167         * be re-used, i.e. after each iommu bitmap wrap-around.
 168         */
 169        if (!zdev->tlb_refresh &&
 170                        (!s390_iommu_strict ||
 171                        ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
 172                goto no_refresh;
 173
 174        rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
 175                                nr_pages * PAGE_SIZE);
 176undo_cpu_trans:
 177        if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
 178                flags = ZPCI_PTE_INVALID;
 179                while (i-- > 0) {
 180                        page_addr -= PAGE_SIZE;
 181                        dma_addr -= PAGE_SIZE;
 182                        entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
 183                        if (!entry)
 184                                break;
 185                        dma_update_cpu_trans(entry, page_addr, flags);
 186                }
 187        }
 188
 189no_refresh:
 190        spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
 191        return rc;
 192}
 193
 194void dma_free_seg_table(unsigned long entry)
 195{
 196        unsigned long *sto = get_rt_sto(entry);
 197        int sx;
 198
 199        for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
 200                if (reg_entry_isvalid(sto[sx]))
 201                        dma_free_page_table(get_st_pto(sto[sx]));
 202
 203        dma_free_cpu_table(sto);
 204}
 205
 206void dma_cleanup_tables(unsigned long *table)
 207{
 208        int rtx;
 209
 210        if (!table)
 211                return;
 212
 213        for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
 214                if (reg_entry_isvalid(table[rtx]))
 215                        dma_free_seg_table(table[rtx]);
 216
 217        dma_free_cpu_table(table);
 218}
 219
 220static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
 221                                       unsigned long start, int size)
 222{
 223        unsigned long boundary_size;
 224
 225        boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1,
 226                              PAGE_SIZE) >> PAGE_SHIFT;
 227        return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
 228                                start, size, 0, boundary_size, 0);
 229}
 230
 231static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
 232{
 233        unsigned long offset, flags;
 234        int wrap = 0;
 235
 236        spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
 237        offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
 238        if (offset == -1) {
 239                /* wrap-around */
 240                offset = __dma_alloc_iommu(zdev, 0, size);
 241                wrap = 1;
 242        }
 243
 244        if (offset != -1) {
 245                zdev->next_bit = offset + size;
 246                if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
 247                        /* global flush after wrap-around with lazy unmap */
 248                        zpci_refresh_global(zdev);
 249        }
 250        spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
 251        return offset;
 252}
 253
 254static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size)
 255{
 256        unsigned long flags;
 257
 258        spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
 259        if (!zdev->iommu_bitmap)
 260                goto out;
 261        bitmap_clear(zdev->iommu_bitmap, offset, size);
 262        /*
 263         * Lazy flush for unmap: need to move next_bit to avoid address re-use
 264         * until wrap-around.
 265         */
 266        if (!s390_iommu_strict && offset >= zdev->next_bit)
 267                zdev->next_bit = offset + size;
 268out:
 269        spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
 270}
 271
 272static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
 273{
 274        struct {
 275                unsigned long rc;
 276                unsigned long addr;
 277        } __packed data = {rc, addr};
 278
 279        zpci_err_hex(&data, sizeof(data));
 280}
 281
 282static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 283                                     unsigned long offset, size_t size,
 284                                     enum dma_data_direction direction,
 285                                     struct dma_attrs *attrs)
 286{
 287        struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 288        unsigned long nr_pages, iommu_page_index;
 289        unsigned long pa = page_to_phys(page) + offset;
 290        int flags = ZPCI_PTE_VALID;
 291        dma_addr_t dma_addr;
 292        int ret;
 293
 294        /* This rounds up number of pages based on size and offset */
 295        nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
 296        iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
 297        if (iommu_page_index == -1) {
 298                ret = -ENOSPC;
 299                goto out_err;
 300        }
 301
 302        /* Use rounded up size */
 303        size = nr_pages * PAGE_SIZE;
 304
 305        dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
 306        if (dma_addr + size > zdev->end_dma) {
 307                ret = -ERANGE;
 308                goto out_free;
 309        }
 310
 311        if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
 312                flags |= ZPCI_TABLE_PROTECTED;
 313
 314        ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
 315        if (ret)
 316                goto out_free;
 317
 318        atomic64_add(nr_pages, &zdev->mapped_pages);
 319        return dma_addr + (offset & ~PAGE_MASK);
 320
 321out_free:
 322        dma_free_iommu(zdev, iommu_page_index, nr_pages);
 323out_err:
 324        zpci_err("map error:\n");
 325        zpci_err_dma(ret, pa);
 326        return DMA_ERROR_CODE;
 327}
 328
 329static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 330                                 size_t size, enum dma_data_direction direction,
 331                                 struct dma_attrs *attrs)
 332{
 333        struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 334        unsigned long iommu_page_index;
 335        int npages, ret;
 336
 337        npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
 338        dma_addr = dma_addr & PAGE_MASK;
 339        ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
 340                               ZPCI_PTE_INVALID);
 341        if (ret) {
 342                zpci_err("unmap error:\n");
 343                zpci_err_dma(ret, dma_addr);
 344                return;
 345        }
 346
 347        atomic64_add(npages, &zdev->unmapped_pages);
 348        iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
 349        dma_free_iommu(zdev, iommu_page_index, npages);
 350}
 351
 352static void *s390_dma_alloc(struct device *dev, size_t size,
 353                            dma_addr_t *dma_handle, gfp_t flag,
 354                            struct dma_attrs *attrs)
 355{
 356        struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 357        struct page *page;
 358        unsigned long pa;
 359        dma_addr_t map;
 360
 361        size = PAGE_ALIGN(size);
 362        page = alloc_pages(flag, get_order(size));
 363        if (!page)
 364                return NULL;
 365
 366        pa = page_to_phys(page);
 367        memset((void *) pa, 0, size);
 368
 369        map = s390_dma_map_pages(dev, page, pa % PAGE_SIZE,
 370                                 size, DMA_BIDIRECTIONAL, NULL);
 371        if (dma_mapping_error(dev, map)) {
 372                free_pages(pa, get_order(size));
 373                return NULL;
 374        }
 375
 376        atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
 377        if (dma_handle)
 378                *dma_handle = map;
 379        return (void *) pa;
 380}
 381
 382static void s390_dma_free(struct device *dev, size_t size,
 383                          void *pa, dma_addr_t dma_handle,
 384                          struct dma_attrs *attrs)
 385{
 386        struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 387
 388        size = PAGE_ALIGN(size);
 389        atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
 390        s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
 391        free_pages((unsigned long) pa, get_order(size));
 392}
 393
 394static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 395                           int nr_elements, enum dma_data_direction dir,
 396                           struct dma_attrs *attrs)
 397{
 398        int mapped_elements = 0;
 399        struct scatterlist *s;
 400        int i;
 401
 402        for_each_sg(sg, s, nr_elements, i) {
 403                struct page *page = sg_page(s);
 404                s->dma_address = s390_dma_map_pages(dev, page, s->offset,
 405                                                    s->length, dir, NULL);
 406                if (!dma_mapping_error(dev, s->dma_address)) {
 407                        s->dma_length = s->length;
 408                        mapped_elements++;
 409                } else
 410                        goto unmap;
 411        }
 412out:
 413        return mapped_elements;
 414
 415unmap:
 416        for_each_sg(sg, s, mapped_elements, i) {
 417                if (s->dma_address)
 418                        s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
 419                                             dir, NULL);
 420                s->dma_address = 0;
 421                s->dma_length = 0;
 422        }
 423        mapped_elements = 0;
 424        goto out;
 425}
 426
 427static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 428                              int nr_elements, enum dma_data_direction dir,
 429                              struct dma_attrs *attrs)
 430{
 431        struct scatterlist *s;
 432        int i;
 433
 434        for_each_sg(sg, s, nr_elements, i) {
 435                s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, NULL);
 436                s->dma_address = 0;
 437                s->dma_length = 0;
 438        }
 439}
 440
 441int zpci_dma_init_device(struct zpci_dev *zdev)
 442{
 443        int rc;
 444
 445        /*
 446         * At this point, if the device is part of an IOMMU domain, this would
 447         * be a strong hint towards a bug in the IOMMU API (common) code and/or
 448         * simultaneous access via IOMMU and DMA API. So let's issue a warning.
 449         */
 450        WARN_ON(zdev->s390_domain);
 451
 452        spin_lock_init(&zdev->iommu_bitmap_lock);
 453        spin_lock_init(&zdev->dma_table_lock);
 454
 455        zdev->dma_table = dma_alloc_cpu_table();
 456        if (!zdev->dma_table) {
 457                rc = -ENOMEM;
 458                goto out_clean;
 459        }
 460
 461        zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
 462        zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
 463        zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
 464        if (!zdev->iommu_bitmap) {
 465                rc = -ENOMEM;
 466                goto out_reg;
 467        }
 468
 469        rc = zpci_register_ioat(zdev,
 470                                0,
 471                                zdev->start_dma + PAGE_OFFSET,
 472                                zdev->start_dma + zdev->iommu_size - 1,
 473                                (u64) zdev->dma_table);
 474        if (rc)
 475                goto out_reg;
 476        return 0;
 477
 478out_reg:
 479        dma_free_cpu_table(zdev->dma_table);
 480out_clean:
 481        return rc;
 482}
 483
 484void zpci_dma_exit_device(struct zpci_dev *zdev)
 485{
 486        /*
 487         * At this point, if the device is part of an IOMMU domain, this would
 488         * be a strong hint towards a bug in the IOMMU API (common) code and/or
 489         * simultaneous access via IOMMU and DMA API. So let's issue a warning.
 490         */
 491        WARN_ON(zdev->s390_domain);
 492
 493        zpci_unregister_ioat(zdev, 0);
 494        dma_cleanup_tables(zdev->dma_table);
 495        zdev->dma_table = NULL;
 496        vfree(zdev->iommu_bitmap);
 497        zdev->iommu_bitmap = NULL;
 498        zdev->next_bit = 0;
 499}
 500
 501static int __init dma_alloc_cpu_table_caches(void)
 502{
 503        dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
 504                                        ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
 505                                        0, NULL);
 506        if (!dma_region_table_cache)
 507                return -ENOMEM;
 508
 509        dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
 510                                        ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
 511                                        0, NULL);
 512        if (!dma_page_table_cache) {
 513                kmem_cache_destroy(dma_region_table_cache);
 514                return -ENOMEM;
 515        }
 516        return 0;
 517}
 518
 519int __init zpci_dma_init(void)
 520{
 521        return dma_alloc_cpu_table_caches();
 522}
 523
 524void zpci_dma_exit(void)
 525{
 526        kmem_cache_destroy(dma_page_table_cache);
 527        kmem_cache_destroy(dma_region_table_cache);
 528}
 529
 530#define PREALLOC_DMA_DEBUG_ENTRIES      (1 << 16)
 531
 532static int __init dma_debug_do_init(void)
 533{
 534        dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
 535        return 0;
 536}
 537fs_initcall(dma_debug_do_init);
 538
 539struct dma_map_ops s390_dma_ops = {
 540        .alloc          = s390_dma_alloc,
 541        .free           = s390_dma_free,
 542        .map_sg         = s390_dma_map_sg,
 543        .unmap_sg       = s390_dma_unmap_sg,
 544        .map_page       = s390_dma_map_pages,
 545        .unmap_page     = s390_dma_unmap_pages,
 546        /* if we support direct DMA this must be conditional */
 547        .is_phys        = 0,
 548        /* dma_supported is unconditionally true without a callback */
 549};
 550EXPORT_SYMBOL_GPL(s390_dma_ops);
 551
 552static int __init s390_iommu_setup(char *str)
 553{
 554        if (!strncmp(str, "strict", 6))
 555                s390_iommu_strict = 1;
 556        return 0;
 557}
 558
 559__setup("s390_iommu=", s390_iommu_setup);
 560