linux/arch/powerpc/platforms/powernv/ocxl.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2// Copyright 2017 IBM Corp.
   3#include <asm/pnv-ocxl.h>
   4#include <asm/opal.h>
   5#include <misc/ocxl-config.h>
   6#include "pci.h"
   7
   8#define PNV_OCXL_TL_P9_RECV_CAP         0x000000000000000Full
   9#define PNV_OCXL_ACTAG_MAX              64
  10/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
  11#define PNV_OCXL_PASID_BITS             15
  12#define PNV_OCXL_PASID_MAX              ((1 << PNV_OCXL_PASID_BITS) - 1)
  13
  14#define AFU_PRESENT (1 << 31)
  15#define AFU_INDEX_MASK 0x3F000000
  16#define AFU_INDEX_SHIFT 24
  17#define ACTAG_MASK 0xFFF
  18
  19
  20struct actag_range {
  21        u16 start;
  22        u16 count;
  23};
  24
  25struct npu_link {
  26        struct list_head list;
  27        int domain;
  28        int bus;
  29        int dev;
  30        u16 fn_desired_actags[8];
  31        struct actag_range fn_actags[8];
  32        bool assignment_done;
  33};
  34static struct list_head links_list = LIST_HEAD_INIT(links_list);
  35static DEFINE_MUTEX(links_list_lock);
  36
  37
  38/*
  39 * opencapi actags handling:
  40 *
  41 * When sending commands, the opencapi device references the memory
  42 * context it's targeting with an 'actag', which is really an alias
  43 * for a (BDF, pasid) combination. When it receives a command, the NPU
  44 * must do a lookup of the actag to identify the memory context. The
  45 * hardware supports a finite number of actags per link (64 for
  46 * POWER9).
  47 *
  48 * The device can carry multiple functions, and each function can have
  49 * multiple AFUs. Each AFU advertises in its config space the number
  50 * of desired actags. The host must configure in the config space of
  51 * the AFU how many actags the AFU is really allowed to use (which can
  52 * be less than what the AFU desires).
  53 *
  54 * When a PCI function is probed by the driver, it has no visibility
  55 * about the other PCI functions and how many actags they'd like,
  56 * which makes it impossible to distribute actags fairly among AFUs.
  57 *
  58 * Unfortunately, the only way to know how many actags a function
  59 * desires is by looking at the data for each AFU in the config space
  60 * and add them up. Similarly, the only way to know how many actags
  61 * all the functions of the physical device desire is by adding the
  62 * previously computed function counts. Then we can match that against
  63 * what the hardware supports.
  64 *
  65 * To get a comprehensive view, we use a 'pci fixup': at the end of
  66 * PCI enumeration, each function counts how many actags its AFUs
  67 * desire and we save it in a 'npu_link' structure, shared between all
  68 * the PCI functions of a same device. Therefore, when the first
  69 * function is probed by the driver, we can get an idea of the total
  70 * count of desired actags for the device, and assign the actags to
  71 * the AFUs, by pro-rating if needed.
  72 */
  73
  74static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
  75{
  76        int vsec = pos;
  77        u16 vendor, id;
  78
  79        while ((vsec = pci_find_next_ext_capability(dev, vsec,
  80                                                    OCXL_EXT_CAP_ID_DVSEC))) {
  81                pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
  82                                &vendor);
  83                pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
  84                if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
  85                        return vsec;
  86        }
  87        return 0;
  88}
  89
  90static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
  91{
  92        int vsec = 0;
  93        u8 idx;
  94
  95        while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
  96                                           vsec))) {
  97                pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
  98                                &idx);
  99                if (idx == afu_idx)
 100                        return vsec;
 101        }
 102        return 0;
 103}
 104
 105static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
 106{
 107        int pos;
 108        u32 val;
 109
 110        pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0);
 111        if (!pos)
 112                return -ESRCH;
 113
 114        pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
 115        if (val & AFU_PRESENT)
 116                *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
 117        else
 118                *afu_idx = -1;
 119        return 0;
 120}
 121
 122static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
 123{
 124        int pos;
 125        u16 actag_sup;
 126
 127        pos = find_dvsec_afu_ctrl(dev, afu_idx);
 128        if (!pos)
 129                return -ESRCH;
 130
 131        pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
 132                        &actag_sup);
 133        *actag = actag_sup & ACTAG_MASK;
 134        return 0;
 135}
 136
 137static struct npu_link *find_link(struct pci_dev *dev)
 138{
 139        struct npu_link *link;
 140
 141        list_for_each_entry(link, &links_list, list) {
 142                /* The functions of a device all share the same link */
 143                if (link->domain == pci_domain_nr(dev->bus) &&
 144                        link->bus == dev->bus->number &&
 145                        link->dev == PCI_SLOT(dev->devfn)) {
 146                        return link;
 147                }
 148        }
 149
 150        /* link doesn't exist yet. Allocate one */
 151        link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
 152        if (!link)
 153                return NULL;
 154        link->domain = pci_domain_nr(dev->bus);
 155        link->bus = dev->bus->number;
 156        link->dev = PCI_SLOT(dev->devfn);
 157        list_add(&link->list, &links_list);
 158        return link;
 159}
 160
 161static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
 162{
 163        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 164        struct pnv_phb *phb = hose->private_data;
 165        struct npu_link *link;
 166        int rc, afu_idx = -1, i, actag;
 167
 168        if (!machine_is(powernv))
 169                return;
 170
 171        if (phb->type != PNV_PHB_NPU_OCAPI)
 172                return;
 173
 174        mutex_lock(&links_list_lock);
 175
 176        link = find_link(dev);
 177        if (!link) {
 178                dev_warn(&dev->dev, "couldn't update actag information\n");
 179                mutex_unlock(&links_list_lock);
 180                return;
 181        }
 182
 183        /*
 184         * Check how many actags are desired for the AFUs under that
 185         * function and add it to the count for the link
 186         */
 187        rc = get_max_afu_index(dev, &afu_idx);
 188        if (rc) {
 189                /* Most likely an invalid config space */
 190                dev_dbg(&dev->dev, "couldn't find AFU information\n");
 191                afu_idx = -1;
 192        }
 193
 194        link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
 195        for (i = 0; i <= afu_idx; i++) {
 196                /*
 197                 * AFU index 'holes' are allowed. So don't fail if we
 198                 * can't read the actag info for an index
 199                 */
 200                rc = get_actag_count(dev, i, &actag);
 201                if (rc)
 202                        continue;
 203                link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
 204        }
 205        dev_dbg(&dev->dev, "total actags for function: %d\n",
 206                link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
 207
 208        mutex_unlock(&links_list_lock);
 209}
 210DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
 211
 212static u16 assign_fn_actags(u16 desired, u16 total)
 213{
 214        u16 count;
 215
 216        if (total <= PNV_OCXL_ACTAG_MAX)
 217                count = desired;
 218        else
 219                count = PNV_OCXL_ACTAG_MAX * desired / total;
 220
 221        return count;
 222}
 223
 224static void assign_actags(struct npu_link *link)
 225{
 226        u16 actag_count, range_start = 0, total_desired = 0;
 227        int i;
 228
 229        for (i = 0; i < 8; i++)
 230                total_desired += link->fn_desired_actags[i];
 231
 232        for (i = 0; i < 8; i++) {
 233                if (link->fn_desired_actags[i]) {
 234                        actag_count = assign_fn_actags(
 235                                link->fn_desired_actags[i],
 236                                total_desired);
 237                        link->fn_actags[i].start = range_start;
 238                        link->fn_actags[i].count = actag_count;
 239                        range_start += actag_count;
 240                        WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
 241                }
 242                pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
 243                        link->domain, link->bus, link->dev, i,
 244                        link->fn_actags[i].start, link->fn_actags[i].count,
 245                        link->fn_desired_actags[i]);
 246        }
 247        link->assignment_done = true;
 248}
 249
 250int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
 251                u16 *supported)
 252{
 253        struct npu_link *link;
 254
 255        mutex_lock(&links_list_lock);
 256
 257        link = find_link(dev);
 258        if (!link) {
 259                dev_err(&dev->dev, "actag information not found\n");
 260                mutex_unlock(&links_list_lock);
 261                return -ENODEV;
 262        }
 263        /*
 264         * On p9, we only have 64 actags per link, so they must be
 265         * shared by all the functions of the same adapter. We counted
 266         * the desired actag counts during PCI enumeration, so that we
 267         * can allocate a pro-rated number of actags to each function.
 268         */
 269        if (!link->assignment_done)
 270                assign_actags(link);
 271
 272        *base      = link->fn_actags[PCI_FUNC(dev->devfn)].start;
 273        *enabled   = link->fn_actags[PCI_FUNC(dev->devfn)].count;
 274        *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
 275
 276        mutex_unlock(&links_list_lock);
 277        return 0;
 278}
 279EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
 280
 281int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
 282{
 283        struct npu_link *link;
 284        int i, rc = -EINVAL;
 285
 286        /*
 287         * The number of PASIDs (process address space ID) which can
 288         * be used by a function depends on how many functions exist
 289         * on the device. The NPU needs to be configured to know how
 290         * many bits are available to PASIDs and how many are to be
 291         * used by the function BDF indentifier.
 292         *
 293         * We only support one AFU-carrying function for now.
 294         */
 295        mutex_lock(&links_list_lock);
 296
 297        link = find_link(dev);
 298        if (!link) {
 299                dev_err(&dev->dev, "actag information not found\n");
 300                mutex_unlock(&links_list_lock);
 301                return -ENODEV;
 302        }
 303
 304        for (i = 0; i < 8; i++)
 305                if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
 306                        *count = PNV_OCXL_PASID_MAX;
 307                        rc = 0;
 308                        break;
 309                }
 310
 311        mutex_unlock(&links_list_lock);
 312        dev_dbg(&dev->dev, "%d PASIDs available for function\n",
 313                rc ? 0 : *count);
 314        return rc;
 315}
 316EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
 317
 318static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
 319{
 320        int shift, idx;
 321
 322        WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
 323        idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
 324        shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
 325        buf[idx] |= rate << shift;
 326}
 327
 328int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
 329                        char *rate_buf, int rate_buf_size)
 330{
 331        if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
 332                return -EINVAL;
 333        /*
 334         * The TL capabilities are a characteristic of the NPU, so
 335         * we go with hard-coded values.
 336         *
 337         * The receiving rate of each template is encoded on 4 bits.
 338         *
 339         * On P9:
 340         * - templates 0 -> 3 are supported
 341         * - templates 0, 1 and 3 have a 0 receiving rate
 342         * - template 2 has receiving rate of 1 (extra cycle)
 343         */
 344        memset(rate_buf, 0, rate_buf_size);
 345        set_templ_rate(2, 1, rate_buf);
 346        *cap = PNV_OCXL_TL_P9_RECV_CAP;
 347        return 0;
 348}
 349EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
 350
 351int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
 352                        uint64_t rate_buf_phys, int rate_buf_size)
 353{
 354        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 355        struct pnv_phb *phb = hose->private_data;
 356        int rc;
 357
 358        if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
 359                return -EINVAL;
 360
 361        rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
 362                        rate_buf_phys, rate_buf_size);
 363        if (rc) {
 364                dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
 365                return -EINVAL;
 366        }
 367        return 0;
 368}
 369EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
 370
 371int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
 372{
 373        int rc;
 374
 375        rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
 376        if (rc) {
 377                dev_err(&dev->dev,
 378                        "Can't get translation interrupt for device\n");
 379                return rc;
 380        }
 381        return 0;
 382}
 383EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
 384
 385void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
 386                        void __iomem *tfc, void __iomem *pe_handle)
 387{
 388        iounmap(dsisr);
 389        iounmap(dar);
 390        iounmap(tfc);
 391        iounmap(pe_handle);
 392}
 393EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
 394
 395int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
 396                        void __iomem **dar, void __iomem **tfc,
 397                        void __iomem **pe_handle)
 398{
 399        u64 reg;
 400        int i, j, rc = 0;
 401        void __iomem *regs[4];
 402
 403        /*
 404         * opal stores the mmio addresses of the DSISR, DAR, TFC and
 405         * PE_HANDLE registers in a device tree property, in that
 406         * order
 407         */
 408        for (i = 0; i < 4; i++) {
 409                rc = of_property_read_u64_index(dev->dev.of_node,
 410                                                "ibm,opal-xsl-mmio", i, &reg);
 411                if (rc)
 412                        break;
 413                regs[i] = ioremap(reg, 8);
 414                if (!regs[i]) {
 415                        rc = -EINVAL;
 416                        break;
 417                }
 418        }
 419        if (rc) {
 420                dev_err(&dev->dev, "Can't map translation mmio registers\n");
 421                for (j = i - 1; j >= 0; j--)
 422                        iounmap(regs[j]);
 423        } else {
 424                *dsisr = regs[0];
 425                *dar = regs[1];
 426                *tfc = regs[2];
 427                *pe_handle = regs[3];
 428        }
 429        return rc;
 430}
 431EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
 432
 433struct spa_data {
 434        u64 phb_opal_id;
 435        u32 bdfn;
 436};
 437
 438int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
 439                void **platform_data)
 440{
 441        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 442        struct pnv_phb *phb = hose->private_data;
 443        struct spa_data *data;
 444        u32 bdfn;
 445        int rc;
 446
 447        data = kzalloc(sizeof(*data), GFP_KERNEL);
 448        if (!data)
 449                return -ENOMEM;
 450
 451        bdfn = (dev->bus->number << 8) | dev->devfn;
 452        rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
 453                                PE_mask);
 454        if (rc) {
 455                dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
 456                kfree(data);
 457                return rc;
 458        }
 459        data->phb_opal_id = phb->opal_id;
 460        data->bdfn = bdfn;
 461        *platform_data = (void *) data;
 462        return 0;
 463}
 464EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
 465
 466void pnv_ocxl_spa_release(void *platform_data)
 467{
 468        struct spa_data *data = (struct spa_data *) platform_data;
 469        int rc;
 470
 471        rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
 472        WARN_ON(rc);
 473        kfree(data);
 474}
 475EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
 476
 477int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
 478{
 479        struct spa_data *data = (struct spa_data *) platform_data;
 480        int rc;
 481
 482        rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
 483        return rc;
 484}
 485EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
 486
 487int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
 488                      uint64_t lpcr, void __iomem **arva)
 489{
 490        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 491        struct pnv_phb *phb = hose->private_data;
 492        u64 mmio_atsd;
 493        int rc;
 494
 495        /* ATSD physical address.
 496         * ATSD LAUNCH register: write access initiates a shoot down to
 497         * initiate the TLB Invalidate command.
 498         */
 499        rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
 500                                        0, &mmio_atsd);
 501        if (rc) {
 502                dev_info(&dev->dev, "No available ATSD found\n");
 503                return rc;
 504        }
 505
 506        /* Assign a register set to a Logical Partition and MMIO ATSD
 507         * LPARID register to the required value.
 508         */
 509        rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),
 510                               lparid, lpcr);
 511        if (rc) {
 512                dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc);
 513                return rc;
 514        }
 515
 516        *arva = ioremap(mmio_atsd, 24);
 517        if (!(*arva)) {
 518                dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd);
 519                rc = -ENOMEM;
 520        }
 521
 522        return rc;
 523}
 524EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);
 525
 526void pnv_ocxl_unmap_lpar(void __iomem *arva)
 527{
 528        iounmap(arva);
 529}
 530EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
 531
 532void pnv_ocxl_tlb_invalidate(void __iomem *arva,
 533                             unsigned long pid,
 534                             unsigned long addr,
 535                             unsigned long page_size)
 536{
 537        unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);
 538        u64 val = 0ull;
 539        int pend;
 540        u8 size;
 541
 542        if (!(arva))
 543                return;
 544
 545        if (addr) {
 546                /* load Abbreviated Virtual Address register with
 547                 * the necessary value
 548                 */
 549                val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));
 550                out_be64(arva + PNV_OCXL_ATSD_AVA, val);
 551        }
 552
 553        /* Write access initiates a shoot down to initiate the
 554         * TLB Invalidate command
 555         */
 556        val = PNV_OCXL_ATSD_LNCH_R;
 557        val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);
 558        if (addr)
 559                val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);
 560        else {
 561                val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);
 562                val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;
 563        }
 564        val |= PNV_OCXL_ATSD_LNCH_PRS;
 565        /* Actual Page Size to be invalidated
 566         * 000 4KB
 567         * 101 64KB
 568         * 001 2MB
 569         * 010 1GB
 570         */
 571        size = 0b101;
 572        if (page_size == 0x1000)
 573                size = 0b000;
 574        if (page_size == 0x200000)
 575                size = 0b001;
 576        if (page_size == 0x40000000)
 577                size = 0b010;
 578        val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size);
 579        val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid);
 580        out_be64(arva + PNV_OCXL_ATSD_LNCH, val);
 581
 582        /* Poll the ATSD status register to determine when the
 583         * TLB Invalidate has been completed.
 584         */
 585        val = in_be64(arva + PNV_OCXL_ATSD_STAT);
 586        pend = val >> 63;
 587
 588        while (pend) {
 589                if (time_after_eq(jiffies, timeout)) {
 590                        pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",
 591                               __func__, val, pid);
 592                        return;
 593                }
 594                cpu_relax();
 595                val = in_be64(arva + PNV_OCXL_ATSD_STAT);
 596                pend = val >> 63;
 597        }
 598}
 599EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);
 600