linux/arch/powerpc/platforms/powernv/ocxl.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2// Copyright 2017 IBM Corp.
   3#include <asm/pnv-ocxl.h>
   4#include <asm/opal.h>
   5#include <asm/xive.h>
   6#include <misc/ocxl-config.h>
   7#include "pci.h"
   8
   9#define PNV_OCXL_TL_P9_RECV_CAP         0x000000000000000Full
  10#define PNV_OCXL_ACTAG_MAX              64
  11/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
  12#define PNV_OCXL_PASID_BITS             15
  13#define PNV_OCXL_PASID_MAX              ((1 << PNV_OCXL_PASID_BITS) - 1)
  14
  15#define AFU_PRESENT (1 << 31)
  16#define AFU_INDEX_MASK 0x3F000000
  17#define AFU_INDEX_SHIFT 24
  18#define ACTAG_MASK 0xFFF
  19
  20
  21struct actag_range {
  22        u16 start;
  23        u16 count;
  24};
  25
  26struct npu_link {
  27        struct list_head list;
  28        int domain;
  29        int bus;
  30        int dev;
  31        u16 fn_desired_actags[8];
  32        struct actag_range fn_actags[8];
  33        bool assignment_done;
  34};
  35static struct list_head links_list = LIST_HEAD_INIT(links_list);
  36static DEFINE_MUTEX(links_list_lock);
  37
  38
  39/*
  40 * opencapi actags handling:
  41 *
  42 * When sending commands, the opencapi device references the memory
  43 * context it's targeting with an 'actag', which is really an alias
  44 * for a (BDF, pasid) combination. When it receives a command, the NPU
  45 * must do a lookup of the actag to identify the memory context. The
  46 * hardware supports a finite number of actags per link (64 for
  47 * POWER9).
  48 *
  49 * The device can carry multiple functions, and each function can have
  50 * multiple AFUs. Each AFU advertises in its config space the number
  51 * of desired actags. The host must configure in the config space of
  52 * the AFU how many actags the AFU is really allowed to use (which can
  53 * be less than what the AFU desires).
  54 *
  55 * When a PCI function is probed by the driver, it has no visibility
  56 * about the other PCI functions and how many actags they'd like,
  57 * which makes it impossible to distribute actags fairly among AFUs.
  58 *
  59 * Unfortunately, the only way to know how many actags a function
  60 * desires is by looking at the data for each AFU in the config space
  61 * and add them up. Similarly, the only way to know how many actags
  62 * all the functions of the physical device desire is by adding the
  63 * previously computed function counts. Then we can match that against
  64 * what the hardware supports.
  65 *
  66 * To get a comprehensive view, we use a 'pci fixup': at the end of
  67 * PCI enumeration, each function counts how many actags its AFUs
  68 * desire and we save it in a 'npu_link' structure, shared between all
  69 * the PCI functions of a same device. Therefore, when the first
  70 * function is probed by the driver, we can get an idea of the total
  71 * count of desired actags for the device, and assign the actags to
  72 * the AFUs, by pro-rating if needed.
  73 */
  74
  75static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
  76{
  77        int vsec = pos;
  78        u16 vendor, id;
  79
  80        while ((vsec = pci_find_next_ext_capability(dev, vsec,
  81                                                    OCXL_EXT_CAP_ID_DVSEC))) {
  82                pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
  83                                &vendor);
  84                pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
  85                if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
  86                        return vsec;
  87        }
  88        return 0;
  89}
  90
  91static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
  92{
  93        int vsec = 0;
  94        u8 idx;
  95
  96        while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
  97                                           vsec))) {
  98                pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
  99                                &idx);
 100                if (idx == afu_idx)
 101                        return vsec;
 102        }
 103        return 0;
 104}
 105
 106static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
 107{
 108        int pos;
 109        u32 val;
 110
 111        pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0);
 112        if (!pos)
 113                return -ESRCH;
 114
 115        pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
 116        if (val & AFU_PRESENT)
 117                *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
 118        else
 119                *afu_idx = -1;
 120        return 0;
 121}
 122
 123static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
 124{
 125        int pos;
 126        u16 actag_sup;
 127
 128        pos = find_dvsec_afu_ctrl(dev, afu_idx);
 129        if (!pos)
 130                return -ESRCH;
 131
 132        pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
 133                        &actag_sup);
 134        *actag = actag_sup & ACTAG_MASK;
 135        return 0;
 136}
 137
 138static struct npu_link *find_link(struct pci_dev *dev)
 139{
 140        struct npu_link *link;
 141
 142        list_for_each_entry(link, &links_list, list) {
 143                /* The functions of a device all share the same link */
 144                if (link->domain == pci_domain_nr(dev->bus) &&
 145                        link->bus == dev->bus->number &&
 146                        link->dev == PCI_SLOT(dev->devfn)) {
 147                        return link;
 148                }
 149        }
 150
 151        /* link doesn't exist yet. Allocate one */
 152        link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
 153        if (!link)
 154                return NULL;
 155        link->domain = pci_domain_nr(dev->bus);
 156        link->bus = dev->bus->number;
 157        link->dev = PCI_SLOT(dev->devfn);
 158        list_add(&link->list, &links_list);
 159        return link;
 160}
 161
 162static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
 163{
 164        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 165        struct pnv_phb *phb = hose->private_data;
 166        struct npu_link *link;
 167        int rc, afu_idx = -1, i, actag;
 168
 169        if (!machine_is(powernv))
 170                return;
 171
 172        if (phb->type != PNV_PHB_NPU_OCAPI)
 173                return;
 174
 175        mutex_lock(&links_list_lock);
 176
 177        link = find_link(dev);
 178        if (!link) {
 179                dev_warn(&dev->dev, "couldn't update actag information\n");
 180                mutex_unlock(&links_list_lock);
 181                return;
 182        }
 183
 184        /*
 185         * Check how many actags are desired for the AFUs under that
 186         * function and add it to the count for the link
 187         */
 188        rc = get_max_afu_index(dev, &afu_idx);
 189        if (rc) {
 190                /* Most likely an invalid config space */
 191                dev_dbg(&dev->dev, "couldn't find AFU information\n");
 192                afu_idx = -1;
 193        }
 194
 195        link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
 196        for (i = 0; i <= afu_idx; i++) {
 197                /*
 198                 * AFU index 'holes' are allowed. So don't fail if we
 199                 * can't read the actag info for an index
 200                 */
 201                rc = get_actag_count(dev, i, &actag);
 202                if (rc)
 203                        continue;
 204                link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
 205        }
 206        dev_dbg(&dev->dev, "total actags for function: %d\n",
 207                link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
 208
 209        mutex_unlock(&links_list_lock);
 210}
 211DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
 212
 213static u16 assign_fn_actags(u16 desired, u16 total)
 214{
 215        u16 count;
 216
 217        if (total <= PNV_OCXL_ACTAG_MAX)
 218                count = desired;
 219        else
 220                count = PNV_OCXL_ACTAG_MAX * desired / total;
 221
 222        return count;
 223}
 224
 225static void assign_actags(struct npu_link *link)
 226{
 227        u16 actag_count, range_start = 0, total_desired = 0;
 228        int i;
 229
 230        for (i = 0; i < 8; i++)
 231                total_desired += link->fn_desired_actags[i];
 232
 233        for (i = 0; i < 8; i++) {
 234                if (link->fn_desired_actags[i]) {
 235                        actag_count = assign_fn_actags(
 236                                link->fn_desired_actags[i],
 237                                total_desired);
 238                        link->fn_actags[i].start = range_start;
 239                        link->fn_actags[i].count = actag_count;
 240                        range_start += actag_count;
 241                        WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
 242                }
 243                pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
 244                        link->domain, link->bus, link->dev, i,
 245                        link->fn_actags[i].start, link->fn_actags[i].count,
 246                        link->fn_desired_actags[i]);
 247        }
 248        link->assignment_done = true;
 249}
 250
 251int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
 252                u16 *supported)
 253{
 254        struct npu_link *link;
 255
 256        mutex_lock(&links_list_lock);
 257
 258        link = find_link(dev);
 259        if (!link) {
 260                dev_err(&dev->dev, "actag information not found\n");
 261                mutex_unlock(&links_list_lock);
 262                return -ENODEV;
 263        }
 264        /*
 265         * On p9, we only have 64 actags per link, so they must be
 266         * shared by all the functions of the same adapter. We counted
 267         * the desired actag counts during PCI enumeration, so that we
 268         * can allocate a pro-rated number of actags to each function.
 269         */
 270        if (!link->assignment_done)
 271                assign_actags(link);
 272
 273        *base      = link->fn_actags[PCI_FUNC(dev->devfn)].start;
 274        *enabled   = link->fn_actags[PCI_FUNC(dev->devfn)].count;
 275        *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
 276
 277        mutex_unlock(&links_list_lock);
 278        return 0;
 279}
 280EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
 281
 282int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
 283{
 284        struct npu_link *link;
 285        int i, rc = -EINVAL;
 286
 287        /*
 288         * The number of PASIDs (process address space ID) which can
 289         * be used by a function depends on how many functions exist
 290         * on the device. The NPU needs to be configured to know how
 291         * many bits are available to PASIDs and how many are to be
 292         * used by the function BDF indentifier.
 293         *
 294         * We only support one AFU-carrying function for now.
 295         */
 296        mutex_lock(&links_list_lock);
 297
 298        link = find_link(dev);
 299        if (!link) {
 300                dev_err(&dev->dev, "actag information not found\n");
 301                mutex_unlock(&links_list_lock);
 302                return -ENODEV;
 303        }
 304
 305        for (i = 0; i < 8; i++)
 306                if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
 307                        *count = PNV_OCXL_PASID_MAX;
 308                        rc = 0;
 309                        break;
 310                }
 311
 312        mutex_unlock(&links_list_lock);
 313        dev_dbg(&dev->dev, "%d PASIDs available for function\n",
 314                rc ? 0 : *count);
 315        return rc;
 316}
 317EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
 318
 319static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
 320{
 321        int shift, idx;
 322
 323        WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
 324        idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
 325        shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
 326        buf[idx] |= rate << shift;
 327}
 328
 329int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
 330                        char *rate_buf, int rate_buf_size)
 331{
 332        if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
 333                return -EINVAL;
 334        /*
 335         * The TL capabilities are a characteristic of the NPU, so
 336         * we go with hard-coded values.
 337         *
 338         * The receiving rate of each template is encoded on 4 bits.
 339         *
 340         * On P9:
 341         * - templates 0 -> 3 are supported
 342         * - templates 0, 1 and 3 have a 0 receiving rate
 343         * - template 2 has receiving rate of 1 (extra cycle)
 344         */
 345        memset(rate_buf, 0, rate_buf_size);
 346        set_templ_rate(2, 1, rate_buf);
 347        *cap = PNV_OCXL_TL_P9_RECV_CAP;
 348        return 0;
 349}
 350EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
 351
 352int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
 353                        uint64_t rate_buf_phys, int rate_buf_size)
 354{
 355        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 356        struct pnv_phb *phb = hose->private_data;
 357        int rc;
 358
 359        if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
 360                return -EINVAL;
 361
 362        rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
 363                        rate_buf_phys, rate_buf_size);
 364        if (rc) {
 365                dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
 366                return -EINVAL;
 367        }
 368        return 0;
 369}
 370EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
 371
 372int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
 373{
 374        int rc;
 375
 376        rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
 377        if (rc) {
 378                dev_err(&dev->dev,
 379                        "Can't get translation interrupt for device\n");
 380                return rc;
 381        }
 382        return 0;
 383}
 384EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
 385
 386void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
 387                        void __iomem *tfc, void __iomem *pe_handle)
 388{
 389        iounmap(dsisr);
 390        iounmap(dar);
 391        iounmap(tfc);
 392        iounmap(pe_handle);
 393}
 394EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
 395
 396int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
 397                        void __iomem **dar, void __iomem **tfc,
 398                        void __iomem **pe_handle)
 399{
 400        u64 reg;
 401        int i, j, rc = 0;
 402        void __iomem *regs[4];
 403
 404        /*
 405         * opal stores the mmio addresses of the DSISR, DAR, TFC and
 406         * PE_HANDLE registers in a device tree property, in that
 407         * order
 408         */
 409        for (i = 0; i < 4; i++) {
 410                rc = of_property_read_u64_index(dev->dev.of_node,
 411                                                "ibm,opal-xsl-mmio", i, &reg);
 412                if (rc)
 413                        break;
 414                regs[i] = ioremap(reg, 8);
 415                if (!regs[i]) {
 416                        rc = -EINVAL;
 417                        break;
 418                }
 419        }
 420        if (rc) {
 421                dev_err(&dev->dev, "Can't map translation mmio registers\n");
 422                for (j = i - 1; j >= 0; j--)
 423                        iounmap(regs[j]);
 424        } else {
 425                *dsisr = regs[0];
 426                *dar = regs[1];
 427                *tfc = regs[2];
 428                *pe_handle = regs[3];
 429        }
 430        return rc;
 431}
 432EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
 433
 434struct spa_data {
 435        u64 phb_opal_id;
 436        u32 bdfn;
 437};
 438
 439int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
 440                void **platform_data)
 441{
 442        struct pci_controller *hose = pci_bus_to_host(dev->bus);
 443        struct pnv_phb *phb = hose->private_data;
 444        struct spa_data *data;
 445        u32 bdfn;
 446        int rc;
 447
 448        data = kzalloc(sizeof(*data), GFP_KERNEL);
 449        if (!data)
 450                return -ENOMEM;
 451
 452        bdfn = (dev->bus->number << 8) | dev->devfn;
 453        rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
 454                                PE_mask);
 455        if (rc) {
 456                dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
 457                kfree(data);
 458                return rc;
 459        }
 460        data->phb_opal_id = phb->opal_id;
 461        data->bdfn = bdfn;
 462        *platform_data = (void *) data;
 463        return 0;
 464}
 465EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
 466
 467void pnv_ocxl_spa_release(void *platform_data)
 468{
 469        struct spa_data *data = (struct spa_data *) platform_data;
 470        int rc;
 471
 472        rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
 473        WARN_ON(rc);
 474        kfree(data);
 475}
 476EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
 477
 478int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
 479{
 480        struct spa_data *data = (struct spa_data *) platform_data;
 481        int rc;
 482
 483        rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
 484        return rc;
 485}
 486EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
 487
 488int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr)
 489{
 490        __be64 flags, trigger_page;
 491        s64 rc;
 492        u32 hwirq;
 493
 494        hwirq = xive_native_alloc_irq();
 495        if (!hwirq)
 496                return -ENOENT;
 497
 498        rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL,
 499                                NULL);
 500        if (rc || !trigger_page) {
 501                xive_native_free_irq(hwirq);
 502                return -ENOENT;
 503        }
 504        *irq = hwirq;
 505        *trigger_addr = be64_to_cpu(trigger_page);
 506        return 0;
 507
 508}
 509EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq);
 510
 511void pnv_ocxl_free_xive_irq(u32 irq)
 512{
 513        xive_native_free_irq(irq);
 514}
 515EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq);
 516