linux/arch/powerpc/platforms/powernv/pci-ioda-tce.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * TCE helpers for IODA PCI/PCIe on PowerNV platforms
   4 *
   5 * Copyright 2018 IBM Corp.
   6 *
   7 * This program is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU General Public License
   9 * as published by the Free Software Foundation; either version
  10 * 2 of the License, or (at your option) any later version.
  11 */
  12
  13#include <linux/kernel.h>
  14#include <linux/iommu.h>
  15
  16#include <asm/iommu.h>
  17#include <asm/tce.h>
  18#include "pci.h"
  19
  20unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
  21{
  22        struct pci_controller *hose = phb->hose;
  23        struct device_node *dn = hose->dn;
  24        unsigned long mask = 0;
  25        int i, rc, count;
  26        u32 val;
  27
  28        count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
  29        if (count <= 0) {
  30                mask = SZ_4K | SZ_64K;
  31                /* Add 16M for POWER8 by default */
  32                if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
  33                                !cpu_has_feature(CPU_FTR_ARCH_300))
  34                        mask |= SZ_16M | SZ_256M;
  35                return mask;
  36        }
  37
  38        for (i = 0; i < count; i++) {
  39                rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
  40                                                i, &val);
  41                if (rc == 0)
  42                        mask |= 1ULL << val;
  43        }
  44
  45        return mask;
  46}
  47
  48void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
  49                void *tce_mem, u64 tce_size,
  50                u64 dma_offset, unsigned int page_shift)
  51{
  52        tbl->it_blocksize = 16;
  53        tbl->it_base = (unsigned long)tce_mem;
  54        tbl->it_page_shift = page_shift;
  55        tbl->it_offset = dma_offset >> tbl->it_page_shift;
  56        tbl->it_index = 0;
  57        tbl->it_size = tce_size >> 3;
  58        tbl->it_busno = 0;
  59        tbl->it_type = TCE_PCI;
  60}
  61
  62static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
  63{
  64        struct page *tce_mem = NULL;
  65        __be64 *addr;
  66
  67        tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
  68                        shift - PAGE_SHIFT);
  69        if (!tce_mem) {
  70                pr_err("Failed to allocate a TCE memory, level shift=%d\n",
  71                                shift);
  72                return NULL;
  73        }
  74        addr = page_address(tce_mem);
  75        memset(addr, 0, 1UL << shift);
  76
  77        return addr;
  78}
  79
  80static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
  81                unsigned long size, unsigned int levels);
  82
  83static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
  84{
  85        __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
  86        int  level = tbl->it_indirect_levels;
  87        const long shift = ilog2(tbl->it_level_size);
  88        unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
  89
  90        while (level) {
  91                int n = (idx & mask) >> (level * shift);
  92                unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
  93
  94                if (!tce) {
  95                        __be64 *tmp2;
  96
  97                        if (!alloc)
  98                                return NULL;
  99
 100                        tmp2 = pnv_alloc_tce_level(tbl->it_nid,
 101                                        ilog2(tbl->it_level_size) + 3);
 102                        if (!tmp2)
 103                                return NULL;
 104
 105                        tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
 106                        oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
 107                                        cpu_to_be64(tce)));
 108                        if (oldtce) {
 109                                pnv_pci_ioda2_table_do_free_pages(tmp2,
 110                                        ilog2(tbl->it_level_size) + 3, 1);
 111                                tce = oldtce;
 112                        }
 113                }
 114
 115                tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
 116                idx &= ~mask;
 117                mask >>= shift;
 118                --level;
 119        }
 120
 121        return tmp + idx;
 122}
 123
 124int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
 125                unsigned long uaddr, enum dma_data_direction direction,
 126                unsigned long attrs)
 127{
 128        u64 proto_tce = iommu_direction_to_tce_perm(direction);
 129        u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
 130        long i;
 131
 132        if (proto_tce & TCE_PCI_WRITE)
 133                proto_tce |= TCE_PCI_READ;
 134
 135        for (i = 0; i < npages; i++) {
 136                unsigned long newtce = proto_tce |
 137                        ((rpn + i) << tbl->it_page_shift);
 138                unsigned long idx = index - tbl->it_offset + i;
 139
 140                *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
 141        }
 142
 143        return 0;
 144}
 145
 146#ifdef CONFIG_IOMMU_API
 147int pnv_tce_xchg(struct iommu_table *tbl, long index,
 148                unsigned long *hpa, enum dma_data_direction *direction,
 149                bool alloc)
 150{
 151        u64 proto_tce = iommu_direction_to_tce_perm(*direction);
 152        unsigned long newtce = *hpa | proto_tce, oldtce;
 153        unsigned long idx = index - tbl->it_offset;
 154        __be64 *ptce = NULL;
 155
 156        BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
 157
 158        if (*direction == DMA_NONE) {
 159                ptce = pnv_tce(tbl, false, idx, false);
 160                if (!ptce) {
 161                        *hpa = 0;
 162                        return 0;
 163                }
 164        }
 165
 166        if (!ptce) {
 167                ptce = pnv_tce(tbl, false, idx, alloc);
 168                if (!ptce)
 169                        return -ENOMEM;
 170        }
 171
 172        if (newtce & TCE_PCI_WRITE)
 173                newtce |= TCE_PCI_READ;
 174
 175        oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
 176        *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
 177        *direction = iommu_tce_direction(oldtce);
 178
 179        return 0;
 180}
 181
 182__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
 183{
 184        if (WARN_ON_ONCE(!tbl->it_userspace))
 185                return NULL;
 186
 187        return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
 188}
 189#endif
 190
 191void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
 192{
 193        long i;
 194
 195        for (i = 0; i < npages; i++) {
 196                unsigned long idx = index - tbl->it_offset + i;
 197                __be64 *ptce = pnv_tce(tbl, false, idx, false);
 198
 199                if (ptce)
 200                        *ptce = cpu_to_be64(0);
 201                else
 202                        /* Skip the rest of the level */
 203                        i |= tbl->it_level_size - 1;
 204        }
 205}
 206
 207unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
 208{
 209        __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
 210
 211        if (!ptce)
 212                return 0;
 213
 214        return be64_to_cpu(*ptce);
 215}
 216
 217static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
 218                unsigned long size, unsigned int levels)
 219{
 220        const unsigned long addr_ul = (unsigned long) addr &
 221                        ~(TCE_PCI_READ | TCE_PCI_WRITE);
 222
 223        if (levels) {
 224                long i;
 225                u64 *tmp = (u64 *) addr_ul;
 226
 227                for (i = 0; i < size; ++i) {
 228                        unsigned long hpa = be64_to_cpu(tmp[i]);
 229
 230                        if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
 231                                continue;
 232
 233                        pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
 234                                        levels - 1);
 235                }
 236        }
 237
 238        free_pages(addr_ul, get_order(size << 3));
 239}
 240
 241void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
 242{
 243        const unsigned long size = tbl->it_indirect_levels ?
 244                        tbl->it_level_size : tbl->it_size;
 245
 246        if (!tbl->it_size)
 247                return;
 248
 249        pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
 250                        tbl->it_indirect_levels);
 251        if (tbl->it_userspace) {
 252                pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
 253                                tbl->it_indirect_levels);
 254        }
 255}
 256
 257static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
 258                unsigned int levels, unsigned long limit,
 259                unsigned long *current_offset, unsigned long *total_allocated)
 260{
 261        __be64 *addr, *tmp;
 262        unsigned long allocated = 1UL << shift;
 263        unsigned int entries = 1UL << (shift - 3);
 264        long i;
 265
 266        addr = pnv_alloc_tce_level(nid, shift);
 267        *total_allocated += allocated;
 268
 269        --levels;
 270        if (!levels) {
 271                *current_offset += allocated;
 272                return addr;
 273        }
 274
 275        for (i = 0; i < entries; ++i) {
 276                tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
 277                                levels, limit, current_offset, total_allocated);
 278                if (!tmp)
 279                        break;
 280
 281                addr[i] = cpu_to_be64(__pa(tmp) |
 282                                TCE_PCI_READ | TCE_PCI_WRITE);
 283
 284                if (*current_offset >= limit)
 285                        break;
 286        }
 287
 288        return addr;
 289}
 290
 291long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
 292                __u32 page_shift, __u64 window_size, __u32 levels,
 293                bool alloc_userspace_copy, struct iommu_table *tbl)
 294{
 295        void *addr, *uas = NULL;
 296        unsigned long offset = 0, level_shift, total_allocated = 0;
 297        unsigned long total_allocated_uas = 0;
 298        const unsigned int window_shift = ilog2(window_size);
 299        unsigned int entries_shift = window_shift - page_shift;
 300        unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
 301                        PAGE_SHIFT);
 302        const unsigned long tce_table_size = 1UL << table_shift;
 303
 304        if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
 305                return -EINVAL;
 306
 307        if (!is_power_of_2(window_size))
 308                return -EINVAL;
 309
 310        /* Adjust direct table size from window_size and levels */
 311        entries_shift = (entries_shift + levels - 1) / levels;
 312        level_shift = entries_shift + 3;
 313        level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
 314
 315        if ((level_shift - 3) * levels + page_shift >= 55)
 316                return -EINVAL;
 317
 318        /* Allocate TCE table */
 319        addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
 320                        1, tce_table_size, &offset, &total_allocated);
 321
 322        /* addr==NULL means that the first level allocation failed */
 323        if (!addr)
 324                return -ENOMEM;
 325
 326        /*
 327         * First level was allocated but some lower level failed as
 328         * we did not allocate as much as we wanted,
 329         * release partially allocated table.
 330         */
 331        if (levels == 1 && offset < tce_table_size)
 332                goto free_tces_exit;
 333
 334        /* Allocate userspace view of the TCE table */
 335        if (alloc_userspace_copy) {
 336                offset = 0;
 337                uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
 338                                1, tce_table_size, &offset,
 339                                &total_allocated_uas);
 340                if (!uas)
 341                        goto free_tces_exit;
 342                if (levels == 1 && (offset < tce_table_size ||
 343                                total_allocated_uas != total_allocated))
 344                        goto free_uas_exit;
 345        }
 346
 347        /* Setup linux iommu table */
 348        pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
 349                        page_shift);
 350        tbl->it_level_size = 1ULL << (level_shift - 3);
 351        tbl->it_indirect_levels = levels - 1;
 352        tbl->it_userspace = uas;
 353        tbl->it_nid = nid;
 354
 355        pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
 356                        window_size, tce_table_size, bus_offset, tbl->it_base,
 357                        tbl->it_userspace, 1, levels);
 358
 359        return 0;
 360
 361free_uas_exit:
 362        pnv_pci_ioda2_table_do_free_pages(uas,
 363                        1ULL << (level_shift - 3), levels - 1);
 364free_tces_exit:
 365        pnv_pci_ioda2_table_do_free_pages(addr,
 366                        1ULL << (level_shift - 3), levels - 1);
 367
 368        return -ENOMEM;
 369}
 370
 371void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
 372                struct iommu_table_group *table_group)
 373{
 374        long i;
 375        bool found;
 376        struct iommu_table_group_link *tgl;
 377
 378        if (!tbl || !table_group)
 379                return;
 380
 381        /* Remove link to a group from table's list of attached groups */
 382        found = false;
 383
 384        rcu_read_lock();
 385        list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
 386                if (tgl->table_group == table_group) {
 387                        list_del_rcu(&tgl->next);
 388                        kfree_rcu(tgl, rcu);
 389                        found = true;
 390                        break;
 391                }
 392        }
 393        rcu_read_unlock();
 394
 395        if (WARN_ON(!found))
 396                return;
 397
 398        /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
 399        found = false;
 400        for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
 401                if (table_group->tables[i] == tbl) {
 402                        iommu_tce_table_put(tbl);
 403                        table_group->tables[i] = NULL;
 404                        found = true;
 405                        break;
 406                }
 407        }
 408        WARN_ON(!found);
 409}
 410
 411long pnv_pci_link_table_and_group(int node, int num,
 412                struct iommu_table *tbl,
 413                struct iommu_table_group *table_group)
 414{
 415        struct iommu_table_group_link *tgl = NULL;
 416
 417        if (WARN_ON(!tbl || !table_group))
 418                return -EINVAL;
 419
 420        tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
 421                        node);
 422        if (!tgl)
 423                return -ENOMEM;
 424
 425        tgl->table_group = table_group;
 426        list_add_rcu(&tgl->next, &tbl->it_group_list);
 427
 428        table_group->tables[num] = iommu_tce_table_get(tbl);
 429
 430        return 0;
 431}
 432