linux/drivers/misc/cxl/api.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 IBM Corp.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public License
   6 * as published by the Free Software Foundation; either version
   7 * 2 of the License, or (at your option) any later version.
   8 */
   9
  10#include <linux/pci.h>
  11#include <linux/slab.h>
  12#include <linux/file.h>
  13#include <misc/cxl.h>
  14#include <linux/msi.h>
  15#include <linux/module.h>
  16#include <linux/mount.h>
  17#include <linux/sched/mm.h>
  18
  19#include "cxl.h"
  20
  21/*
  22 * Since we want to track memory mappings to be able to force-unmap
  23 * when the AFU is no longer reachable, we need an inode. For devices
  24 * opened through the cxl user API, this is not a problem, but a
  25 * userland process can also get a cxl fd through the cxl_get_fd()
  26 * API, which is used by the cxlflash driver.
  27 *
  28 * Therefore we implement our own simple pseudo-filesystem and inode
  29 * allocator. We don't use the anonymous inode, as we need the
  30 * meta-data associated with it (address_space) and it is shared by
  31 * other drivers/processes, so it could lead to cxl unmapping VMAs
  32 * from random processes.
  33 */
  34
  35#define CXL_PSEUDO_FS_MAGIC     0x1697697f
  36
  37static int cxl_fs_cnt;
  38static struct vfsmount *cxl_vfs_mount;
  39
  40static const struct dentry_operations cxl_fs_dops = {
  41        .d_dname        = simple_dname,
  42};
  43
  44static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags,
  45                                const char *dev_name, void *data)
  46{
  47        return mount_pseudo(fs_type, "cxl:", NULL, &cxl_fs_dops,
  48                        CXL_PSEUDO_FS_MAGIC);
  49}
  50
  51static struct file_system_type cxl_fs_type = {
  52        .name           = "cxl",
  53        .owner          = THIS_MODULE,
  54        .mount          = cxl_fs_mount,
  55        .kill_sb        = kill_anon_super,
  56};
  57
  58
  59void cxl_release_mapping(struct cxl_context *ctx)
  60{
  61        if (ctx->kernelapi && ctx->mapping)
  62                simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
  63}
  64
  65static struct file *cxl_getfile(const char *name,
  66                                const struct file_operations *fops,
  67                                void *priv, int flags)
  68{
  69        struct qstr this;
  70        struct path path;
  71        struct file *file;
  72        struct inode *inode = NULL;
  73        int rc;
  74
  75        /* strongly inspired by anon_inode_getfile() */
  76
  77        if (fops->owner && !try_module_get(fops->owner))
  78                return ERR_PTR(-ENOENT);
  79
  80        rc = simple_pin_fs(&cxl_fs_type, &cxl_vfs_mount, &cxl_fs_cnt);
  81        if (rc < 0) {
  82                pr_err("Cannot mount cxl pseudo filesystem: %d\n", rc);
  83                file = ERR_PTR(rc);
  84                goto err_module;
  85        }
  86
  87        inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);
  88        if (IS_ERR(inode)) {
  89                file = ERR_CAST(inode);
  90                goto err_fs;
  91        }
  92
  93        file = ERR_PTR(-ENOMEM);
  94        this.name = name;
  95        this.len = strlen(name);
  96        this.hash = 0;
  97        path.dentry = d_alloc_pseudo(cxl_vfs_mount->mnt_sb, &this);
  98        if (!path.dentry)
  99                goto err_inode;
 100
 101        path.mnt = mntget(cxl_vfs_mount);
 102        d_instantiate(path.dentry, inode);
 103
 104        file = alloc_file(&path, OPEN_FMODE(flags), fops);
 105        if (IS_ERR(file))
 106                goto err_dput;
 107        file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
 108        file->private_data = priv;
 109
 110        return file;
 111
 112err_dput:
 113        path_put(&path);
 114err_inode:
 115        iput(inode);
 116err_fs:
 117        simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
 118err_module:
 119        module_put(fops->owner);
 120        return file;
 121}
 122
 123struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
 124{
 125        struct cxl_afu *afu;
 126        struct cxl_context  *ctx;
 127        int rc;
 128
 129        afu = cxl_pci_to_afu(dev);
 130        if (IS_ERR(afu))
 131                return ERR_CAST(afu);
 132
 133        ctx = cxl_context_alloc();
 134        if (!ctx)
 135                return ERR_PTR(-ENOMEM);
 136
 137        ctx->kernelapi = true;
 138
 139        /* Make it a slave context.  We can promote it later? */
 140        rc = cxl_context_init(ctx, afu, false);
 141        if (rc)
 142                goto err_ctx;
 143
 144        return ctx;
 145
 146err_ctx:
 147        kfree(ctx);
 148        return ERR_PTR(rc);
 149}
 150EXPORT_SYMBOL_GPL(cxl_dev_context_init);
 151
 152struct cxl_context *cxl_get_context(struct pci_dev *dev)
 153{
 154        return dev->dev.archdata.cxl_ctx;
 155}
 156EXPORT_SYMBOL_GPL(cxl_get_context);
 157
 158int cxl_release_context(struct cxl_context *ctx)
 159{
 160        if (ctx->status >= STARTED)
 161                return -EBUSY;
 162
 163        cxl_context_free(ctx);
 164
 165        return 0;
 166}
 167EXPORT_SYMBOL_GPL(cxl_release_context);
 168
 169static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
 170{
 171        __u16 range;
 172        int r;
 173
 174        for (r = 0; r < CXL_IRQ_RANGES; r++) {
 175                range = ctx->irqs.range[r];
 176                if (num < range) {
 177                        return ctx->irqs.offset[r] + num;
 178                }
 179                num -= range;
 180        }
 181        return 0;
 182}
 183
 184int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq)
 185{
 186        if (*ctx == NULL || *afu_irq == 0) {
 187                *afu_irq = 1;
 188                *ctx = cxl_get_context(pdev);
 189        } else {
 190                (*afu_irq)++;
 191                if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) {
 192                        *ctx = list_next_entry(*ctx, extra_irq_contexts);
 193                        *afu_irq = 1;
 194                }
 195        }
 196        return cxl_find_afu_irq(*ctx, *afu_irq);
 197}
 198/* Exported via cxl_base */
 199
 200int cxl_set_priv(struct cxl_context *ctx, void *priv)
 201{
 202        if (!ctx)
 203                return -EINVAL;
 204
 205        ctx->priv = priv;
 206
 207        return 0;
 208}
 209EXPORT_SYMBOL_GPL(cxl_set_priv);
 210
 211void *cxl_get_priv(struct cxl_context *ctx)
 212{
 213        if (!ctx)
 214                return ERR_PTR(-EINVAL);
 215
 216        return ctx->priv;
 217}
 218EXPORT_SYMBOL_GPL(cxl_get_priv);
 219
 220int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
 221{
 222        int res;
 223        irq_hw_number_t hwirq;
 224
 225        if (num == 0)
 226                num = ctx->afu->pp_irqs;
 227        res = afu_allocate_irqs(ctx, num);
 228        if (res)
 229                return res;
 230
 231        if (!cpu_has_feature(CPU_FTR_HVMODE)) {
 232                /* In a guest, the PSL interrupt is not multiplexed. It was
 233                 * allocated above, and we need to set its handler
 234                 */
 235                hwirq = cxl_find_afu_irq(ctx, 0);
 236                if (hwirq)
 237                        cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl");
 238        }
 239
 240        if (ctx->status == STARTED) {
 241                if (cxl_ops->update_ivtes)
 242                        cxl_ops->update_ivtes(ctx);
 243                else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n");
 244        }
 245
 246        return res;
 247}
 248EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
 249
 250void cxl_free_afu_irqs(struct cxl_context *ctx)
 251{
 252        irq_hw_number_t hwirq;
 253        unsigned int virq;
 254
 255        if (!cpu_has_feature(CPU_FTR_HVMODE)) {
 256                hwirq = cxl_find_afu_irq(ctx, 0);
 257                if (hwirq) {
 258                        virq = irq_find_mapping(NULL, hwirq);
 259                        if (virq)
 260                                cxl_unmap_irq(virq, ctx);
 261                }
 262        }
 263        afu_irq_name_free(ctx);
 264        cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
 265}
 266EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
 267
 268int cxl_map_afu_irq(struct cxl_context *ctx, int num,
 269                    irq_handler_t handler, void *cookie, char *name)
 270{
 271        irq_hw_number_t hwirq;
 272
 273        /*
 274         * Find interrupt we are to register.
 275         */
 276        hwirq = cxl_find_afu_irq(ctx, num);
 277        if (!hwirq)
 278                return -ENOENT;
 279
 280        return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name);
 281}
 282EXPORT_SYMBOL_GPL(cxl_map_afu_irq);
 283
 284void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie)
 285{
 286        irq_hw_number_t hwirq;
 287        unsigned int virq;
 288
 289        hwirq = cxl_find_afu_irq(ctx, num);
 290        if (!hwirq)
 291                return;
 292
 293        virq = irq_find_mapping(NULL, hwirq);
 294        if (virq)
 295                cxl_unmap_irq(virq, cookie);
 296}
 297EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq);
 298
 299/*
 300 * Start a context
 301 * Code here similar to afu_ioctl_start_work().
 302 */
 303int cxl_start_context(struct cxl_context *ctx, u64 wed,
 304                      struct task_struct *task)
 305{
 306        int rc = 0;
 307        bool kernel = true;
 308
 309        pr_devel("%s: pe: %i\n", __func__, ctx->pe);
 310
 311        mutex_lock(&ctx->status_mutex);
 312        if (ctx->status == STARTED)
 313                goto out; /* already started */
 314
 315        /*
 316         * Increment the mapped context count for adapter. This also checks
 317         * if adapter_context_lock is taken.
 318         */
 319        rc = cxl_adapter_context_get(ctx->afu->adapter);
 320        if (rc)
 321                goto out;
 322
 323        if (task) {
 324                ctx->pid = get_task_pid(task, PIDTYPE_PID);
 325                kernel = false;
 326                ctx->real_mode = false;
 327
 328                /* acquire a reference to the task's mm */
 329                ctx->mm = get_task_mm(current);
 330
 331                /* ensure this mm_struct can't be freed */
 332                cxl_context_mm_count_get(ctx);
 333
 334                /* decrement the use count */
 335                if (ctx->mm)
 336                        mmput(ctx->mm);
 337        }
 338
 339        /*
 340         * Increment driver use count. Enables global TLBIs for hash
 341         * and callbacks to handle the segment table
 342         */
 343        cxl_ctx_get();
 344
 345        if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
 346                put_pid(ctx->pid);
 347                ctx->pid = NULL;
 348                cxl_adapter_context_put(ctx->afu->adapter);
 349                cxl_ctx_put();
 350                if (task)
 351                        cxl_context_mm_count_put(ctx);
 352                goto out;
 353        }
 354
 355        ctx->status = STARTED;
 356out:
 357        mutex_unlock(&ctx->status_mutex);
 358        return rc;
 359}
 360EXPORT_SYMBOL_GPL(cxl_start_context);
 361
 362int cxl_process_element(struct cxl_context *ctx)
 363{
 364        return ctx->external_pe;
 365}
 366EXPORT_SYMBOL_GPL(cxl_process_element);
 367
 368/* Stop a context.  Returns 0 on success, otherwise -Errno */
 369int cxl_stop_context(struct cxl_context *ctx)
 370{
 371        return __detach_context(ctx);
 372}
 373EXPORT_SYMBOL_GPL(cxl_stop_context);
 374
 375void cxl_set_master(struct cxl_context *ctx)
 376{
 377        ctx->master = true;
 378}
 379EXPORT_SYMBOL_GPL(cxl_set_master);
 380
 381int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode)
 382{
 383        if (ctx->status == STARTED) {
 384                /*
 385                 * We could potentially update the PE and issue an update LLCMD
 386                 * to support this, but it doesn't seem to have a good use case
 387                 * since it's trivial to just create a second kernel context
 388                 * with different translation modes, so until someone convinces
 389                 * me otherwise:
 390                 */
 391                return -EBUSY;
 392        }
 393
 394        ctx->real_mode = real_mode;
 395        return 0;
 396}
 397EXPORT_SYMBOL_GPL(cxl_set_translation_mode);
 398
 399/* wrappers around afu_* file ops which are EXPORTED */
 400int cxl_fd_open(struct inode *inode, struct file *file)
 401{
 402        return afu_open(inode, file);
 403}
 404EXPORT_SYMBOL_GPL(cxl_fd_open);
 405int cxl_fd_release(struct inode *inode, struct file *file)
 406{
 407        return afu_release(inode, file);
 408}
 409EXPORT_SYMBOL_GPL(cxl_fd_release);
 410long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 411{
 412        return afu_ioctl(file, cmd, arg);
 413}
 414EXPORT_SYMBOL_GPL(cxl_fd_ioctl);
 415int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm)
 416{
 417        return afu_mmap(file, vm);
 418}
 419EXPORT_SYMBOL_GPL(cxl_fd_mmap);
 420unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll)
 421{
 422        return afu_poll(file, poll);
 423}
 424EXPORT_SYMBOL_GPL(cxl_fd_poll);
 425ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
 426                        loff_t *off)
 427{
 428        return afu_read(file, buf, count, off);
 429}
 430EXPORT_SYMBOL_GPL(cxl_fd_read);
 431
 432#define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME
 433
 434/* Get a struct file and fd for a context and attach the ops */
 435struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
 436                        int *fd)
 437{
 438        struct file *file;
 439        int rc, flags, fdtmp;
 440        char *name = NULL;
 441
 442        /* only allow one per context */
 443        if (ctx->mapping)
 444                return ERR_PTR(-EEXIST);
 445
 446        flags = O_RDWR | O_CLOEXEC;
 447
 448        /* This code is similar to anon_inode_getfd() */
 449        rc = get_unused_fd_flags(flags);
 450        if (rc < 0)
 451                return ERR_PTR(rc);
 452        fdtmp = rc;
 453
 454        /*
 455         * Patch the file ops.  Needs to be careful that this is rentrant safe.
 456         */
 457        if (fops) {
 458                PATCH_FOPS(open);
 459                PATCH_FOPS(poll);
 460                PATCH_FOPS(read);
 461                PATCH_FOPS(release);
 462                PATCH_FOPS(unlocked_ioctl);
 463                PATCH_FOPS(compat_ioctl);
 464                PATCH_FOPS(mmap);
 465        } else /* use default ops */
 466                fops = (struct file_operations *)&afu_fops;
 467
 468        name = kasprintf(GFP_KERNEL, "cxl:%d", ctx->pe);
 469        file = cxl_getfile(name, fops, ctx, flags);
 470        kfree(name);
 471        if (IS_ERR(file))
 472                goto err_fd;
 473
 474        cxl_context_set_mapping(ctx, file->f_mapping);
 475        *fd = fdtmp;
 476        return file;
 477
 478err_fd:
 479        put_unused_fd(fdtmp);
 480        return NULL;
 481}
 482EXPORT_SYMBOL_GPL(cxl_get_fd);
 483
 484struct cxl_context *cxl_fops_get_context(struct file *file)
 485{
 486        return file->private_data;
 487}
 488EXPORT_SYMBOL_GPL(cxl_fops_get_context);
 489
 490void cxl_set_driver_ops(struct cxl_context *ctx,
 491                        struct cxl_afu_driver_ops *ops)
 492{
 493        WARN_ON(!ops->fetch_event || !ops->event_delivered);
 494        atomic_set(&ctx->afu_driver_events, 0);
 495        ctx->afu_driver_ops = ops;
 496}
 497EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
 498
 499void cxl_context_events_pending(struct cxl_context *ctx,
 500                                unsigned int new_events)
 501{
 502        atomic_add(new_events, &ctx->afu_driver_events);
 503        wake_up_all(&ctx->wq);
 504}
 505EXPORT_SYMBOL_GPL(cxl_context_events_pending);
 506
 507int cxl_start_work(struct cxl_context *ctx,
 508                   struct cxl_ioctl_start_work *work)
 509{
 510        int rc;
 511
 512        /* code taken from afu_ioctl_start_work */
 513        if (!(work->flags & CXL_START_WORK_NUM_IRQS))
 514                work->num_interrupts = ctx->afu->pp_irqs;
 515        else if ((work->num_interrupts < ctx->afu->pp_irqs) ||
 516                 (work->num_interrupts > ctx->afu->irqs_max)) {
 517                return -EINVAL;
 518        }
 519
 520        rc = afu_register_irqs(ctx, work->num_interrupts);
 521        if (rc)
 522                return rc;
 523
 524        rc = cxl_start_context(ctx, work->work_element_descriptor, current);
 525        if (rc < 0) {
 526                afu_release_irqs(ctx, ctx);
 527                return rc;
 528        }
 529
 530        return 0;
 531}
 532EXPORT_SYMBOL_GPL(cxl_start_work);
 533
 534void __iomem *cxl_psa_map(struct cxl_context *ctx)
 535{
 536        if (ctx->status != STARTED)
 537                return NULL;
 538
 539        pr_devel("%s: psn_phys%llx size:%llx\n",
 540                __func__, ctx->psn_phys, ctx->psn_size);
 541        return ioremap(ctx->psn_phys, ctx->psn_size);
 542}
 543EXPORT_SYMBOL_GPL(cxl_psa_map);
 544
 545void cxl_psa_unmap(void __iomem *addr)
 546{
 547        iounmap(addr);
 548}
 549EXPORT_SYMBOL_GPL(cxl_psa_unmap);
 550
 551int cxl_afu_reset(struct cxl_context *ctx)
 552{
 553        struct cxl_afu *afu = ctx->afu;
 554        int rc;
 555
 556        rc = cxl_ops->afu_reset(afu);
 557        if (rc)
 558                return rc;
 559
 560        return cxl_ops->afu_check_and_enable(afu);
 561}
 562EXPORT_SYMBOL_GPL(cxl_afu_reset);
 563
 564void cxl_perst_reloads_same_image(struct cxl_afu *afu,
 565                                  bool perst_reloads_same_image)
 566{
 567        afu->adapter->perst_same_image = perst_reloads_same_image;
 568}
 569EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
 570
 571ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count)
 572{
 573        struct cxl_afu *afu = cxl_pci_to_afu(dev);
 574        if (IS_ERR(afu))
 575                return -ENODEV;
 576
 577        return cxl_ops->read_adapter_vpd(afu->adapter, buf, count);
 578}
 579EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd);
 580
 581int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs)
 582{
 583        struct cxl_afu *afu = cxl_pci_to_afu(dev);
 584        if (IS_ERR(afu))
 585                return -ENODEV;
 586
 587        if (irqs > afu->adapter->user_irqs)
 588                return -EINVAL;
 589
 590        /* Limit user_irqs to prevent the user increasing this via sysfs */
 591        afu->adapter->user_irqs = irqs;
 592        afu->irqs_max = irqs;
 593
 594        return 0;
 595}
 596EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process);
 597
 598int cxl_get_max_irqs_per_process(struct pci_dev *dev)
 599{
 600        struct cxl_afu *afu = cxl_pci_to_afu(dev);
 601        if (IS_ERR(afu))
 602                return -ENODEV;
 603
 604        return afu->irqs_max;
 605}
 606EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process);
 607
 608/*
 609 * This is a special interrupt allocation routine called from the PHB's MSI
 610 * setup function. When capi interrupts are allocated in this manner they must
 611 * still be associated with a running context, but since the MSI APIs have no
 612 * way to specify this we use the default context associated with the device.
 613 *
 614 * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU
 615 * interrupt number, so in order to overcome this their driver informs us of
 616 * the restriction by setting the maximum interrupts per context, and we
 617 * allocate additional contexts as necessary so that we can keep the AFU
 618 * interrupt number within the supported range.
 619 */
 620int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 621{
 622        struct cxl_context *ctx, *new_ctx, *default_ctx;
 623        int remaining;
 624        int rc;
 625
 626        ctx = default_ctx = cxl_get_context(pdev);
 627        if (WARN_ON(!default_ctx))
 628                return -ENODEV;
 629
 630        remaining = nvec;
 631        while (remaining > 0) {
 632                rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max));
 633                if (rc) {
 634                        pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev));
 635                        return rc;
 636                }
 637                remaining -= ctx->afu->irqs_max;
 638
 639                if (ctx != default_ctx && default_ctx->status == STARTED) {
 640                        WARN_ON(cxl_start_context(ctx,
 641                                be64_to_cpu(default_ctx->elem->common.wed),
 642                                NULL));
 643                }
 644
 645                if (remaining > 0) {
 646                        new_ctx = cxl_dev_context_init(pdev);
 647                        if (IS_ERR(new_ctx)) {
 648                                pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev));
 649                                return -ENOSPC;
 650                        }
 651                        list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts);
 652                        ctx = new_ctx;
 653                }
 654        }
 655
 656        return 0;
 657}
 658/* Exported via cxl_base */
 659
 660void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
 661{
 662        struct cxl_context *ctx, *pos, *tmp;
 663
 664        ctx = cxl_get_context(pdev);
 665        if (WARN_ON(!ctx))
 666                return;
 667
 668        cxl_free_afu_irqs(ctx);
 669        list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) {
 670                cxl_stop_context(pos);
 671                cxl_free_afu_irqs(pos);
 672                list_del(&pos->extra_irq_contexts);
 673                cxl_release_context(pos);
 674        }
 675}
 676/* Exported via cxl_base */
 677