linux/arch/powerpc/platforms/pseries/vas.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright 2020-21 IBM Corp.
   4 */
   5
   6#define pr_fmt(fmt) "vas: " fmt
   7
   8#include <linux/module.h>
   9#include <linux/kernel.h>
  10#include <linux/export.h>
  11#include <linux/types.h>
  12#include <linux/delay.h>
  13#include <linux/slab.h>
  14#include <linux/interrupt.h>
  15#include <linux/irqdomain.h>
  16#include <asm/machdep.h>
  17#include <asm/hvcall.h>
  18#include <asm/plpar_wrappers.h>
  19#include <asm/vas.h>
  20#include "vas.h"
  21
  22#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul
  23#define VAS_DEFAULT_DOMAIN_ID   0xFFFFFFFFFFFFFFFFul
  24/* The hypervisor allows one credit per window right now */
  25#define DEF_WIN_CREDS           1
  26
  27static struct vas_all_caps caps_all;
  28static bool copypaste_feat;
  29
  30static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
  31static DEFINE_MUTEX(vas_pseries_mutex);
  32
  33static long hcall_return_busy_check(long rc)
  34{
  35        /* Check if we are stalled for some time */
  36        if (H_IS_LONG_BUSY(rc)) {
  37                msleep(get_longbusy_msecs(rc));
  38                rc = H_BUSY;
  39        } else if (rc == H_BUSY) {
  40                cond_resched();
  41        }
  42
  43        return rc;
  44}
  45
  46/*
  47 * Allocate VAS window hcall
  48 */
  49static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
  50                                     u8 wintype, u16 credits)
  51{
  52        long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
  53        long rc;
  54
  55        do {
  56                rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
  57                                  credits, domain[0], domain[1], domain[2],
  58                                  domain[3], domain[4], domain[5]);
  59
  60                rc = hcall_return_busy_check(rc);
  61        } while (rc == H_BUSY);
  62
  63        if (rc == H_SUCCESS) {
  64                if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
  65                        pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
  66                        return -ENOTSUPP;
  67                }
  68                win->vas_win.winid = retbuf[0];
  69                win->win_addr = retbuf[1];
  70                win->complete_irq = retbuf[2];
  71                win->fault_irq = retbuf[3];
  72                return 0;
  73        }
  74
  75        pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
  76                rc, wintype, credits);
  77
  78        return -EIO;
  79}
  80
  81/*
  82 * Deallocate VAS window hcall.
  83 */
  84static int h_deallocate_vas_window(u64 winid)
  85{
  86        long rc;
  87
  88        do {
  89                rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
  90
  91                rc = hcall_return_busy_check(rc);
  92        } while (rc == H_BUSY);
  93
  94        if (rc == H_SUCCESS)
  95                return 0;
  96
  97        pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
  98                rc, winid);
  99        return -EIO;
 100}
 101
 102/*
 103 * Modify VAS window.
 104 * After the window is opened with allocate window hcall, configure it
 105 * with flags and LPAR PID before using.
 106 */
 107static int h_modify_vas_window(struct pseries_vas_window *win)
 108{
 109        long rc;
 110        u32 lpid = mfspr(SPRN_PID);
 111
 112        /*
 113         * AMR value is not supported in Linux VAS implementation.
 114         * The hypervisor ignores it if 0 is passed.
 115         */
 116        do {
 117                rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
 118                                        win->vas_win.winid, lpid, 0,
 119                                        VAS_MOD_WIN_FLAGS, 0);
 120
 121                rc = hcall_return_busy_check(rc);
 122        } while (rc == H_BUSY);
 123
 124        if (rc == H_SUCCESS)
 125                return 0;
 126
 127        pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n",
 128                        rc, win->vas_win.winid, lpid);
 129        return -EIO;
 130}
 131
 132/*
 133 * This hcall is used to determine the capabilities from the hypervisor.
 134 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
 135 * @query_type: If 0 is passed, the hypervisor returns the overall
 136 *              capabilities which provides all feature(s) that are
 137 *              available. Then query the hypervisor to get the
 138 *              corresponding capabilities for the specific feature.
 139 *              Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
 140 *                      and VAS GZIP Default capabilities.
 141 *                      H_QUERY_NX_CAPABILITIES provides NX GZIP
 142 *                      capabilities.
 143 * @result: Return buffer to save capabilities.
 144 */
 145int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
 146{
 147        long rc;
 148
 149        rc = plpar_hcall_norets(hcall, query_type, result);
 150
 151        if (rc == H_SUCCESS)
 152                return 0;
 153
 154        pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n",
 155                        hcall, rc, query_type, result);
 156        return -EIO;
 157}
 158EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
 159
 160/*
 161 * hcall to get fault CRB from the hypervisor.
 162 */
 163static int h_get_nx_fault(u32 winid, u64 buffer)
 164{
 165        long rc;
 166
 167        rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
 168
 169        if (rc == H_SUCCESS)
 170                return 0;
 171
 172        pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
 173                rc, winid, buffer);
 174        return -EIO;
 175
 176}
 177
 178/*
 179 * Handle the fault interrupt.
 180 * When the fault interrupt is received for each window, query the
 181 * hypervisor to get the fault CRB on the specific fault. Then
 182 * process the CRB by updating CSB or send signal if the user space
 183 * CSB is invalid.
 184 * Note: The hypervisor forwards an interrupt for each fault request.
 185 *      So one fault CRB to process for each H_GET_NX_FAULT hcall.
 186 */
 187static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
 188{
 189        struct pseries_vas_window *txwin = data;
 190        struct coprocessor_request_block crb;
 191        struct vas_user_win_ref *tsk_ref;
 192        int rc;
 193
 194        rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
 195        if (!rc) {
 196                tsk_ref = &txwin->vas_win.task_ref;
 197                vas_dump_crb(&crb);
 198                vas_update_csb(&crb, tsk_ref);
 199        }
 200
 201        return IRQ_HANDLED;
 202}
 203
 204/*
 205 * Allocate window and setup IRQ mapping.
 206 */
 207static int allocate_setup_window(struct pseries_vas_window *txwin,
 208                                 u64 *domain, u8 wintype)
 209{
 210        int rc;
 211
 212        rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
 213        if (rc)
 214                return rc;
 215        /*
 216         * On PowerVM, the hypervisor setup and forwards the fault
 217         * interrupt per window. So the IRQ setup and fault handling
 218         * will be done for each open window separately.
 219         */
 220        txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
 221        if (!txwin->fault_virq) {
 222                pr_err("Failed irq mapping %d\n", txwin->fault_irq);
 223                rc = -EINVAL;
 224                goto out_win;
 225        }
 226
 227        txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
 228                                txwin->vas_win.winid);
 229        if (!txwin->name) {
 230                rc = -ENOMEM;
 231                goto out_irq;
 232        }
 233
 234        rc = request_threaded_irq(txwin->fault_virq, NULL,
 235                                  pseries_vas_fault_thread_fn, IRQF_ONESHOT,
 236                                  txwin->name, txwin);
 237        if (rc) {
 238                pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
 239                       txwin->vas_win.winid, txwin->fault_virq, rc);
 240                goto out_free;
 241        }
 242
 243        txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
 244
 245        return 0;
 246out_free:
 247        kfree(txwin->name);
 248out_irq:
 249        irq_dispose_mapping(txwin->fault_virq);
 250out_win:
 251        h_deallocate_vas_window(txwin->vas_win.winid);
 252        return rc;
 253}
 254
 255static inline void free_irq_setup(struct pseries_vas_window *txwin)
 256{
 257        free_irq(txwin->fault_virq, txwin);
 258        kfree(txwin->name);
 259        irq_dispose_mapping(txwin->fault_virq);
 260}
 261
 262static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
 263                                              enum vas_cop_type cop_type)
 264{
 265        long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
 266        struct vas_cop_feat_caps *cop_feat_caps;
 267        struct vas_caps *caps;
 268        struct pseries_vas_window *txwin;
 269        int rc;
 270
 271        txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
 272        if (!txwin)
 273                return ERR_PTR(-ENOMEM);
 274
 275        /*
 276         * A VAS window can have many credits which means that many
 277         * requests can be issued simultaneously. But the hypervisor
 278         * restricts one credit per window.
 279         * The hypervisor introduces 2 different types of credits:
 280         * Default credit type (Uses normal priority FIFO):
 281         *      A limited number of credits are assigned to partitions
 282         *      based on processor entitlement. But these credits may be
 283         *      over-committed on a system depends on whether the CPUs
 284         *      are in shared or dedicated modes - that is, more requests
 285         *      may be issued across the system than NX can service at
 286         *      once which can result in paste command failure (RMA_busy).
 287         *      Then the process has to resend requests or fall-back to
 288         *      SW compression.
 289         * Quality of Service (QoS) credit type (Uses high priority FIFO):
 290         *      To avoid NX HW contention, the system admins can assign
 291         *      QoS credits for each LPAR so that this partition is
 292         *      guaranteed access to NX resources. These credits are
 293         *      assigned to partitions via the HMC.
 294         *      Refer PAPR for more information.
 295         *
 296         * Allocate window with QoS credits if user requested. Otherwise
 297         * default credits are used.
 298         */
 299        if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
 300                caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
 301        else
 302                caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
 303
 304        cop_feat_caps = &caps->caps;
 305
 306        if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) >
 307                        atomic_read(&cop_feat_caps->target_lpar_creds)) {
 308                pr_err("Credits are not available to allocate window\n");
 309                rc = -EINVAL;
 310                goto out;
 311        }
 312
 313        if (vas_id == -1) {
 314                /*
 315                 * The user space is requesting to allocate a window on
 316                 * a VAS instance where the process is executing.
 317                 * On PowerVM, domain values are passed to the hypervisor
 318                 * to select VAS instance. Useful if the process is
 319                 * affinity to NUMA node.
 320                 * The hypervisor selects VAS instance if
 321                 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
 322                 * The h_allocate_vas_window hcall is defined to take a
 323                 * domain values as specified by h_home_node_associativity,
 324                 * So no unpacking needs to be done.
 325                 */
 326                rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
 327                                  VPHN_FLAG_VCPU, smp_processor_id());
 328                if (rc != H_SUCCESS) {
 329                        pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
 330                        goto out;
 331                }
 332        }
 333
 334        /*
 335         * Allocate / Deallocate window hcalls and setup / free IRQs
 336         * have to be protected with mutex.
 337         * Open VAS window: Allocate window hcall and setup IRQ
 338         * Close VAS window: Deallocate window hcall and free IRQ
 339         *      The hypervisor waits until all NX requests are
 340         *      completed before closing the window. So expects OS
 341         *      to handle NX faults, means IRQ can be freed only
 342         *      after the deallocate window hcall is returned.
 343         * So once the window is closed with deallocate hcall before
 344         * the IRQ is freed, it can be assigned to new allocate
 345         * hcall with the same fault IRQ by the hypervisor. It can
 346         * result in setup IRQ fail for the new window since the
 347         * same fault IRQ is not freed by the OS before.
 348         */
 349        mutex_lock(&vas_pseries_mutex);
 350        rc = allocate_setup_window(txwin, (u64 *)&domain[0],
 351                                   cop_feat_caps->win_type);
 352        mutex_unlock(&vas_pseries_mutex);
 353        if (rc)
 354                goto out;
 355
 356        /*
 357         * Modify window and it is ready to use.
 358         */
 359        rc = h_modify_vas_window(txwin);
 360        if (!rc)
 361                rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
 362        if (rc)
 363                goto out_free;
 364
 365        vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
 366        txwin->win_type = cop_feat_caps->win_type;
 367        mutex_lock(&vas_pseries_mutex);
 368        list_add(&txwin->win_list, &caps->list);
 369        mutex_unlock(&vas_pseries_mutex);
 370
 371        return &txwin->vas_win;
 372
 373out_free:
 374        /*
 375         * Window is not operational. Free IRQ before closing
 376         * window so that do not have to hold mutex.
 377         */
 378        free_irq_setup(txwin);
 379        h_deallocate_vas_window(txwin->vas_win.winid);
 380out:
 381        atomic_dec(&cop_feat_caps->used_lpar_creds);
 382        kfree(txwin);
 383        return ERR_PTR(rc);
 384}
 385
 386static u64 vas_paste_address(struct vas_window *vwin)
 387{
 388        struct pseries_vas_window *win;
 389
 390        win = container_of(vwin, struct pseries_vas_window, vas_win);
 391        return win->win_addr;
 392}
 393
 394static int deallocate_free_window(struct pseries_vas_window *win)
 395{
 396        int rc = 0;
 397
 398        /*
 399         * The hypervisor waits for all requests including faults
 400         * are processed before closing the window - Means all
 401         * credits have to be returned. In the case of fault
 402         * request, a credit is returned after OS issues
 403         * H_GET_NX_FAULT hcall.
 404         * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
 405         * hcall.
 406         */
 407        rc = h_deallocate_vas_window(win->vas_win.winid);
 408        if (!rc)
 409                free_irq_setup(win);
 410
 411        return rc;
 412}
 413
 414static int vas_deallocate_window(struct vas_window *vwin)
 415{
 416        struct pseries_vas_window *win;
 417        struct vas_cop_feat_caps *caps;
 418        int rc = 0;
 419
 420        if (!vwin)
 421                return -EINVAL;
 422
 423        win = container_of(vwin, struct pseries_vas_window, vas_win);
 424
 425        /* Should not happen */
 426        if (win->win_type >= VAS_MAX_FEAT_TYPE) {
 427                pr_err("Window (%u): Invalid window type %u\n",
 428                                vwin->winid, win->win_type);
 429                return -EINVAL;
 430        }
 431
 432        caps = &vascaps[win->win_type].caps;
 433        mutex_lock(&vas_pseries_mutex);
 434        rc = deallocate_free_window(win);
 435        if (rc) {
 436                mutex_unlock(&vas_pseries_mutex);
 437                return rc;
 438        }
 439
 440        list_del(&win->win_list);
 441        atomic_dec(&caps->used_lpar_creds);
 442        mutex_unlock(&vas_pseries_mutex);
 443
 444        put_vas_user_win_ref(&vwin->task_ref);
 445        mm_context_remove_vas_window(vwin->task_ref.mm);
 446
 447        kfree(win);
 448        return 0;
 449}
 450
 451static const struct vas_user_win_ops vops_pseries = {
 452        .open_win       = vas_allocate_window,  /* Open and configure window */
 453        .paste_addr     = vas_paste_address,    /* To do copy/paste */
 454        .close_win      = vas_deallocate_window, /* Close window */
 455};
 456
 457/*
 458 * Supporting only nx-gzip coprocessor type now, but this API code
 459 * extended to other coprocessor types later.
 460 */
 461int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
 462                             const char *name)
 463{
 464        int rc;
 465
 466        if (!copypaste_feat)
 467                return -ENOTSUPP;
 468
 469        rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
 470
 471        return rc;
 472}
 473EXPORT_SYMBOL_GPL(vas_register_api_pseries);
 474
 475void vas_unregister_api_pseries(void)
 476{
 477        vas_unregister_coproc_api();
 478}
 479EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
 480
 481/*
 482 * Get the specific capabilities based on the feature type.
 483 * Right now supports GZIP default and GZIP QoS capabilities.
 484 */
 485static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
 486                                struct hv_vas_cop_feat_caps *hv_caps)
 487{
 488        struct vas_cop_feat_caps *caps;
 489        struct vas_caps *vcaps;
 490        int rc = 0;
 491
 492        vcaps = &vascaps[type];
 493        memset(vcaps, 0, sizeof(*vcaps));
 494        INIT_LIST_HEAD(&vcaps->list);
 495
 496        caps = &vcaps->caps;
 497
 498        rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
 499                                          (u64)virt_to_phys(hv_caps));
 500        if (rc)
 501                return rc;
 502
 503        caps->user_mode = hv_caps->user_mode;
 504        if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
 505                pr_err("User space COPY/PASTE is not supported\n");
 506                return -ENOTSUPP;
 507        }
 508
 509        caps->descriptor = be64_to_cpu(hv_caps->descriptor);
 510        caps->win_type = hv_caps->win_type;
 511        if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
 512                pr_err("Unsupported window type %u\n", caps->win_type);
 513                return -EINVAL;
 514        }
 515        caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
 516        caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
 517        atomic_set(&caps->target_lpar_creds,
 518                   be16_to_cpu(hv_caps->target_lpar_creds));
 519        if (feat == VAS_GZIP_DEF_FEAT) {
 520                caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
 521
 522                if (caps->max_win_creds < DEF_WIN_CREDS) {
 523                        pr_err("Window creds(%u) > max allowed window creds(%u)\n",
 524                               DEF_WIN_CREDS, caps->max_win_creds);
 525                        return -EINVAL;
 526                }
 527        }
 528
 529        copypaste_feat = true;
 530
 531        return 0;
 532}
 533
 534static int __init pseries_vas_init(void)
 535{
 536        struct hv_vas_cop_feat_caps *hv_cop_caps;
 537        struct hv_vas_all_caps *hv_caps;
 538        int rc;
 539
 540        /*
 541         * Linux supports user space COPY/PASTE only with Radix
 542         */
 543        if (!radix_enabled()) {
 544                pr_err("API is supported only with radix page tables\n");
 545                return -ENOTSUPP;
 546        }
 547
 548        hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
 549        if (!hv_caps)
 550                return -ENOMEM;
 551        /*
 552         * Get VAS overall capabilities by passing 0 to feature type.
 553         */
 554        rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
 555                                          (u64)virt_to_phys(hv_caps));
 556        if (rc)
 557                goto out;
 558
 559        caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
 560        caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
 561
 562        hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL);
 563        if (!hv_cop_caps) {
 564                rc = -ENOMEM;
 565                goto out;
 566        }
 567        /*
 568         * QOS capabilities available
 569         */
 570        if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
 571                rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
 572                                          VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps);
 573
 574                if (rc)
 575                        goto out_cop;
 576        }
 577        /*
 578         * Default capabilities available
 579         */
 580        if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) {
 581                rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
 582                                          VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps);
 583                if (rc)
 584                        goto out_cop;
 585        }
 586
 587        pr_info("GZIP feature is available\n");
 588
 589out_cop:
 590        kfree(hv_cop_caps);
 591out:
 592        kfree(hv_caps);
 593        return rc;
 594}
 595machine_device_initcall(pseries, pseries_vas_init);
 596