linux/arch/powerpc/platforms/pseries/vas.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright 2020-21 IBM Corp.
   4 */
   5
   6#define pr_fmt(fmt) "vas: " fmt
   7
   8#include <linux/module.h>
   9#include <linux/kernel.h>
  10#include <linux/export.h>
  11#include <linux/types.h>
  12#include <linux/delay.h>
  13#include <linux/slab.h>
  14#include <linux/interrupt.h>
  15#include <linux/irqdomain.h>
  16#include <asm/machdep.h>
  17#include <asm/hvcall.h>
  18#include <asm/plpar_wrappers.h>
  19#include <asm/vas.h>
  20#include "vas.h"
  21
  22#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul
  23#define VAS_DEFAULT_DOMAIN_ID   0xFFFFFFFFFFFFFFFFul
  24/* The hypervisor allows one credit per window right now */
  25#define DEF_WIN_CREDS           1
  26
  27static struct vas_all_caps caps_all;
  28static bool copypaste_feat;
  29static struct hv_vas_cop_feat_caps hv_cop_caps;
  30
  31static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
  32static DEFINE_MUTEX(vas_pseries_mutex);
  33static bool migration_in_progress;
  34
  35static long hcall_return_busy_check(long rc)
  36{
  37        /* Check if we are stalled for some time */
  38        if (H_IS_LONG_BUSY(rc)) {
  39                msleep(get_longbusy_msecs(rc));
  40                rc = H_BUSY;
  41        } else if (rc == H_BUSY) {
  42                cond_resched();
  43        }
  44
  45        return rc;
  46}
  47
  48/*
  49 * Allocate VAS window hcall
  50 */
  51static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
  52                                     u8 wintype, u16 credits)
  53{
  54        long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
  55        long rc;
  56
  57        do {
  58                rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
  59                                  credits, domain[0], domain[1], domain[2],
  60                                  domain[3], domain[4], domain[5]);
  61
  62                rc = hcall_return_busy_check(rc);
  63        } while (rc == H_BUSY);
  64
  65        if (rc == H_SUCCESS) {
  66                if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
  67                        pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
  68                        return -ENOTSUPP;
  69                }
  70                win->vas_win.winid = retbuf[0];
  71                win->win_addr = retbuf[1];
  72                win->complete_irq = retbuf[2];
  73                win->fault_irq = retbuf[3];
  74                return 0;
  75        }
  76
  77        pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
  78                rc, wintype, credits);
  79
  80        return -EIO;
  81}
  82
  83/*
  84 * Deallocate VAS window hcall.
  85 */
  86static int h_deallocate_vas_window(u64 winid)
  87{
  88        long rc;
  89
  90        do {
  91                rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
  92
  93                rc = hcall_return_busy_check(rc);
  94        } while (rc == H_BUSY);
  95
  96        if (rc == H_SUCCESS)
  97                return 0;
  98
  99        pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
 100                rc, winid);
 101        return -EIO;
 102}
 103
 104/*
 105 * Modify VAS window.
 106 * After the window is opened with allocate window hcall, configure it
 107 * with flags and LPAR PID before using.
 108 */
 109static int h_modify_vas_window(struct pseries_vas_window *win)
 110{
 111        long rc;
 112
 113        /*
 114         * AMR value is not supported in Linux VAS implementation.
 115         * The hypervisor ignores it if 0 is passed.
 116         */
 117        do {
 118                rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
 119                                        win->vas_win.winid, win->pid, 0,
 120                                        VAS_MOD_WIN_FLAGS, 0);
 121
 122                rc = hcall_return_busy_check(rc);
 123        } while (rc == H_BUSY);
 124
 125        if (rc == H_SUCCESS)
 126                return 0;
 127
 128        pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
 129                        rc, win->vas_win.winid, win->pid);
 130        return -EIO;
 131}
 132
 133/*
 134 * This hcall is used to determine the capabilities from the hypervisor.
 135 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
 136 * @query_type: If 0 is passed, the hypervisor returns the overall
 137 *              capabilities which provides all feature(s) that are
 138 *              available. Then query the hypervisor to get the
 139 *              corresponding capabilities for the specific feature.
 140 *              Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
 141 *                      and VAS GZIP Default capabilities.
 142 *                      H_QUERY_NX_CAPABILITIES provides NX GZIP
 143 *                      capabilities.
 144 * @result: Return buffer to save capabilities.
 145 */
 146int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
 147{
 148        long rc;
 149
 150        rc = plpar_hcall_norets(hcall, query_type, result);
 151
 152        if (rc == H_SUCCESS)
 153                return 0;
 154
 155        /* H_FUNCTION means HV does not support VAS so don't print an error */
 156        if (rc != H_FUNCTION) {
 157                pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
 158                        (hcall == H_QUERY_VAS_CAPABILITIES) ?
 159                                "H_QUERY_VAS_CAPABILITIES" :
 160                                "H_QUERY_NX_CAPABILITIES",
 161                        rc, query_type, result);
 162        }
 163
 164        return -EIO;
 165}
 166EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
 167
 168/*
 169 * hcall to get fault CRB from the hypervisor.
 170 */
 171static int h_get_nx_fault(u32 winid, u64 buffer)
 172{
 173        long rc;
 174
 175        rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
 176
 177        if (rc == H_SUCCESS)
 178                return 0;
 179
 180        pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
 181                rc, winid, buffer);
 182        return -EIO;
 183
 184}
 185
 186/*
 187 * Handle the fault interrupt.
 188 * When the fault interrupt is received for each window, query the
 189 * hypervisor to get the fault CRB on the specific fault. Then
 190 * process the CRB by updating CSB or send signal if the user space
 191 * CSB is invalid.
 192 * Note: The hypervisor forwards an interrupt for each fault request.
 193 *      So one fault CRB to process for each H_GET_NX_FAULT hcall.
 194 */
 195static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
 196{
 197        struct pseries_vas_window *txwin = data;
 198        struct coprocessor_request_block crb;
 199        struct vas_user_win_ref *tsk_ref;
 200        int rc;
 201
 202        rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
 203        if (!rc) {
 204                tsk_ref = &txwin->vas_win.task_ref;
 205                vas_dump_crb(&crb);
 206                vas_update_csb(&crb, tsk_ref);
 207        }
 208
 209        return IRQ_HANDLED;
 210}
 211
 212/*
 213 * Allocate window and setup IRQ mapping.
 214 */
 215static int allocate_setup_window(struct pseries_vas_window *txwin,
 216                                 u64 *domain, u8 wintype)
 217{
 218        int rc;
 219
 220        rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
 221        if (rc)
 222                return rc;
 223        /*
 224         * On PowerVM, the hypervisor setup and forwards the fault
 225         * interrupt per window. So the IRQ setup and fault handling
 226         * will be done for each open window separately.
 227         */
 228        txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
 229        if (!txwin->fault_virq) {
 230                pr_err("Failed irq mapping %d\n", txwin->fault_irq);
 231                rc = -EINVAL;
 232                goto out_win;
 233        }
 234
 235        txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
 236                                txwin->vas_win.winid);
 237        if (!txwin->name) {
 238                rc = -ENOMEM;
 239                goto out_irq;
 240        }
 241
 242        rc = request_threaded_irq(txwin->fault_virq, NULL,
 243                                  pseries_vas_fault_thread_fn, IRQF_ONESHOT,
 244                                  txwin->name, txwin);
 245        if (rc) {
 246                pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
 247                       txwin->vas_win.winid, txwin->fault_virq, rc);
 248                goto out_free;
 249        }
 250
 251        txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
 252
 253        return 0;
 254out_free:
 255        kfree(txwin->name);
 256out_irq:
 257        irq_dispose_mapping(txwin->fault_virq);
 258out_win:
 259        h_deallocate_vas_window(txwin->vas_win.winid);
 260        return rc;
 261}
 262
 263static inline void free_irq_setup(struct pseries_vas_window *txwin)
 264{
 265        free_irq(txwin->fault_virq, txwin);
 266        kfree(txwin->name);
 267        irq_dispose_mapping(txwin->fault_virq);
 268}
 269
 270static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
 271                                              enum vas_cop_type cop_type)
 272{
 273        long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
 274        struct vas_cop_feat_caps *cop_feat_caps;
 275        struct vas_caps *caps;
 276        struct pseries_vas_window *txwin;
 277        int rc;
 278
 279        txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
 280        if (!txwin)
 281                return ERR_PTR(-ENOMEM);
 282
 283        /*
 284         * A VAS window can have many credits which means that many
 285         * requests can be issued simultaneously. But the hypervisor
 286         * restricts one credit per window.
 287         * The hypervisor introduces 2 different types of credits:
 288         * Default credit type (Uses normal priority FIFO):
 289         *      A limited number of credits are assigned to partitions
 290         *      based on processor entitlement. But these credits may be
 291         *      over-committed on a system depends on whether the CPUs
 292         *      are in shared or dedicated modes - that is, more requests
 293         *      may be issued across the system than NX can service at
 294         *      once which can result in paste command failure (RMA_busy).
 295         *      Then the process has to resend requests or fall-back to
 296         *      SW compression.
 297         * Quality of Service (QoS) credit type (Uses high priority FIFO):
 298         *      To avoid NX HW contention, the system admins can assign
 299         *      QoS credits for each LPAR so that this partition is
 300         *      guaranteed access to NX resources. These credits are
 301         *      assigned to partitions via the HMC.
 302         *      Refer PAPR for more information.
 303         *
 304         * Allocate window with QoS credits if user requested. Otherwise
 305         * default credits are used.
 306         */
 307        if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
 308                caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
 309        else
 310                caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
 311
 312        cop_feat_caps = &caps->caps;
 313
 314        if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
 315                        atomic_read(&cop_feat_caps->nr_total_credits)) {
 316                pr_err("Credits are not available to allocate window\n");
 317                rc = -EINVAL;
 318                goto out;
 319        }
 320
 321        if (vas_id == -1) {
 322                /*
 323                 * The user space is requesting to allocate a window on
 324                 * a VAS instance where the process is executing.
 325                 * On PowerVM, domain values are passed to the hypervisor
 326                 * to select VAS instance. Useful if the process is
 327                 * affinity to NUMA node.
 328                 * The hypervisor selects VAS instance if
 329                 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
 330                 * The h_allocate_vas_window hcall is defined to take a
 331                 * domain values as specified by h_home_node_associativity,
 332                 * So no unpacking needs to be done.
 333                 */
 334                rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
 335                                  VPHN_FLAG_VCPU, smp_processor_id());
 336                if (rc != H_SUCCESS) {
 337                        pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
 338                        goto out;
 339                }
 340        }
 341
 342        txwin->pid = mfspr(SPRN_PID);
 343
 344        /*
 345         * Allocate / Deallocate window hcalls and setup / free IRQs
 346         * have to be protected with mutex.
 347         * Open VAS window: Allocate window hcall and setup IRQ
 348         * Close VAS window: Deallocate window hcall and free IRQ
 349         *      The hypervisor waits until all NX requests are
 350         *      completed before closing the window. So expects OS
 351         *      to handle NX faults, means IRQ can be freed only
 352         *      after the deallocate window hcall is returned.
 353         * So once the window is closed with deallocate hcall before
 354         * the IRQ is freed, it can be assigned to new allocate
 355         * hcall with the same fault IRQ by the hypervisor. It can
 356         * result in setup IRQ fail for the new window since the
 357         * same fault IRQ is not freed by the OS before.
 358         */
 359        mutex_lock(&vas_pseries_mutex);
 360        if (migration_in_progress)
 361                rc = -EBUSY;
 362        else
 363                rc = allocate_setup_window(txwin, (u64 *)&domain[0],
 364                                   cop_feat_caps->win_type);
 365        mutex_unlock(&vas_pseries_mutex);
 366        if (rc)
 367                goto out;
 368
 369        /*
 370         * Modify window and it is ready to use.
 371         */
 372        rc = h_modify_vas_window(txwin);
 373        if (!rc)
 374                rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
 375        if (rc)
 376                goto out_free;
 377
 378        txwin->win_type = cop_feat_caps->win_type;
 379        mutex_lock(&vas_pseries_mutex);
 380        /*
 381         * Possible to lose the acquired credit with DLPAR core
 382         * removal after the window is opened. So if there are any
 383         * closed windows (means with lost credits), do not give new
 384         * window to user space. New windows will be opened only
 385         * after the existing windows are reopened when credits are
 386         * available.
 387         */
 388        if (!caps->nr_close_wins) {
 389                list_add(&txwin->win_list, &caps->list);
 390                caps->nr_open_windows++;
 391                mutex_unlock(&vas_pseries_mutex);
 392                vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
 393                return &txwin->vas_win;
 394        }
 395        mutex_unlock(&vas_pseries_mutex);
 396
 397        put_vas_user_win_ref(&txwin->vas_win.task_ref);
 398        rc = -EBUSY;
 399        pr_err("No credit is available to allocate window\n");
 400
 401out_free:
 402        /*
 403         * Window is not operational. Free IRQ before closing
 404         * window so that do not have to hold mutex.
 405         */
 406        free_irq_setup(txwin);
 407        h_deallocate_vas_window(txwin->vas_win.winid);
 408out:
 409        atomic_dec(&cop_feat_caps->nr_used_credits);
 410        kfree(txwin);
 411        return ERR_PTR(rc);
 412}
 413
 414static u64 vas_paste_address(struct vas_window *vwin)
 415{
 416        struct pseries_vas_window *win;
 417
 418        win = container_of(vwin, struct pseries_vas_window, vas_win);
 419        return win->win_addr;
 420}
 421
 422static int deallocate_free_window(struct pseries_vas_window *win)
 423{
 424        int rc = 0;
 425
 426        /*
 427         * The hypervisor waits for all requests including faults
 428         * are processed before closing the window - Means all
 429         * credits have to be returned. In the case of fault
 430         * request, a credit is returned after OS issues
 431         * H_GET_NX_FAULT hcall.
 432         * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
 433         * hcall.
 434         */
 435        rc = h_deallocate_vas_window(win->vas_win.winid);
 436        if (!rc)
 437                free_irq_setup(win);
 438
 439        return rc;
 440}
 441
 442static int vas_deallocate_window(struct vas_window *vwin)
 443{
 444        struct pseries_vas_window *win;
 445        struct vas_cop_feat_caps *caps;
 446        int rc = 0;
 447
 448        if (!vwin)
 449                return -EINVAL;
 450
 451        win = container_of(vwin, struct pseries_vas_window, vas_win);
 452
 453        /* Should not happen */
 454        if (win->win_type >= VAS_MAX_FEAT_TYPE) {
 455                pr_err("Window (%u): Invalid window type %u\n",
 456                                vwin->winid, win->win_type);
 457                return -EINVAL;
 458        }
 459
 460        caps = &vascaps[win->win_type].caps;
 461        mutex_lock(&vas_pseries_mutex);
 462        /*
 463         * VAS window is already closed in the hypervisor when
 464         * lost the credit or with migration. So just remove the entry
 465         * from the list, remove task references and free vas_window
 466         * struct.
 467         */
 468        if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
 469                !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
 470                rc = deallocate_free_window(win);
 471                if (rc) {
 472                        mutex_unlock(&vas_pseries_mutex);
 473                        return rc;
 474                }
 475        } else
 476                vascaps[win->win_type].nr_close_wins--;
 477
 478        list_del(&win->win_list);
 479        atomic_dec(&caps->nr_used_credits);
 480        vascaps[win->win_type].nr_open_windows--;
 481        mutex_unlock(&vas_pseries_mutex);
 482
 483        put_vas_user_win_ref(&vwin->task_ref);
 484        mm_context_remove_vas_window(vwin->task_ref.mm);
 485
 486        kfree(win);
 487        return 0;
 488}
 489
 490static const struct vas_user_win_ops vops_pseries = {
 491        .open_win       = vas_allocate_window,  /* Open and configure window */
 492        .paste_addr     = vas_paste_address,    /* To do copy/paste */
 493        .close_win      = vas_deallocate_window, /* Close window */
 494};
 495
 496/*
 497 * Supporting only nx-gzip coprocessor type now, but this API code
 498 * extended to other coprocessor types later.
 499 */
 500int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
 501                             const char *name)
 502{
 503        int rc;
 504
 505        if (!copypaste_feat)
 506                return -ENOTSUPP;
 507
 508        rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
 509
 510        return rc;
 511}
 512EXPORT_SYMBOL_GPL(vas_register_api_pseries);
 513
 514void vas_unregister_api_pseries(void)
 515{
 516        vas_unregister_coproc_api();
 517}
 518EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
 519
 520/*
 521 * Get the specific capabilities based on the feature type.
 522 * Right now supports GZIP default and GZIP QoS capabilities.
 523 */
 524static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
 525                                struct hv_vas_cop_feat_caps *hv_caps)
 526{
 527        struct vas_cop_feat_caps *caps;
 528        struct vas_caps *vcaps;
 529        int rc = 0;
 530
 531        vcaps = &vascaps[type];
 532        memset(vcaps, 0, sizeof(*vcaps));
 533        INIT_LIST_HEAD(&vcaps->list);
 534
 535        vcaps->feat = feat;
 536        caps = &vcaps->caps;
 537
 538        rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
 539                                          (u64)virt_to_phys(hv_caps));
 540        if (rc)
 541                return rc;
 542
 543        caps->user_mode = hv_caps->user_mode;
 544        if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
 545                pr_err("User space COPY/PASTE is not supported\n");
 546                return -ENOTSUPP;
 547        }
 548
 549        caps->descriptor = be64_to_cpu(hv_caps->descriptor);
 550        caps->win_type = hv_caps->win_type;
 551        if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
 552                pr_err("Unsupported window type %u\n", caps->win_type);
 553                return -EINVAL;
 554        }
 555        caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
 556        caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
 557        atomic_set(&caps->nr_total_credits,
 558                   be16_to_cpu(hv_caps->target_lpar_creds));
 559        if (feat == VAS_GZIP_DEF_FEAT) {
 560                caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
 561
 562                if (caps->max_win_creds < DEF_WIN_CREDS) {
 563                        pr_err("Window creds(%u) > max allowed window creds(%u)\n",
 564                               DEF_WIN_CREDS, caps->max_win_creds);
 565                        return -EINVAL;
 566                }
 567        }
 568
 569        rc = sysfs_add_vas_caps(caps);
 570        if (rc)
 571                return rc;
 572
 573        copypaste_feat = true;
 574
 575        return 0;
 576}
 577
 578/*
 579 * VAS windows can be closed due to lost credits when the core is
 580 * removed. So reopen them if credits are available due to DLPAR
 581 * core add and set the window active status. When NX sees the page
 582 * fault on the unmapped paste address, the kernel handles the fault
 583 * by setting the remapping to new paste address if the window is
 584 * active.
 585 */
 586static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
 587                                 bool migrate)
 588{
 589        long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
 590        struct vas_cop_feat_caps *caps = &vcaps->caps;
 591        struct pseries_vas_window *win = NULL, *tmp;
 592        int rc, mv_ents = 0;
 593        int flag;
 594
 595        /*
 596         * Nothing to do if there are no closed windows.
 597         */
 598        if (!vcaps->nr_close_wins)
 599                return 0;
 600
 601        /*
 602         * For the core removal, the hypervisor reduces the credits
 603         * assigned to the LPAR and the kernel closes VAS windows
 604         * in the hypervisor depends on reduced credits. The kernel
 605         * uses LIFO (the last windows that are opened will be closed
 606         * first) and expects to open in the same order when credits
 607         * are available.
 608         * For example, 40 windows are closed when the LPAR lost 2 cores
 609         * (dedicated). If 1 core is added, this LPAR can have 20 more
 610         * credits. It means the kernel can reopen 20 windows. So move
 611         * 20 entries in the VAS windows lost and reopen next 20 windows.
 612         * For partition migration, reopen all windows that are closed
 613         * during resume.
 614         */
 615        if ((vcaps->nr_close_wins > creds) && !migrate)
 616                mv_ents = vcaps->nr_close_wins - creds;
 617
 618        list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
 619                if (!mv_ents)
 620                        break;
 621
 622                mv_ents--;
 623        }
 624
 625        /*
 626         * Open windows if they are closed only with migration or
 627         * DLPAR (lost credit) before.
 628         */
 629        if (migrate)
 630                flag = VAS_WIN_MIGRATE_CLOSE;
 631        else
 632                flag = VAS_WIN_NO_CRED_CLOSE;
 633
 634        list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
 635                /*
 636                 * This window is closed with DLPAR and migration events.
 637                 * So reopen the window with the last event.
 638                 * The user space is not suspended with the current
 639                 * migration notifier. So the user space can issue DLPAR
 640                 * CPU hotplug while migration in progress. In this case
 641                 * this window will be opened with the last event.
 642                 */
 643                if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
 644                        (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
 645                        win->vas_win.status &= ~flag;
 646                        continue;
 647                }
 648
 649                /*
 650                 * Nothing to do on this window if it is not closed
 651                 * with this flag
 652                 */
 653                if (!(win->vas_win.status & flag))
 654                        continue;
 655
 656                rc = allocate_setup_window(win, (u64 *)&domain[0],
 657                                           caps->win_type);
 658                if (rc)
 659                        return rc;
 660
 661                rc = h_modify_vas_window(win);
 662                if (rc)
 663                        goto out;
 664
 665                mutex_lock(&win->vas_win.task_ref.mmap_mutex);
 666                /*
 667                 * Set window status to active
 668                 */
 669                win->vas_win.status &= ~flag;
 670                mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
 671                win->win_type = caps->win_type;
 672                if (!--vcaps->nr_close_wins)
 673                        break;
 674        }
 675
 676        return 0;
 677out:
 678        /*
 679         * Window modify HCALL failed. So close the window to the
 680         * hypervisor and return.
 681         */
 682        free_irq_setup(win);
 683        h_deallocate_vas_window(win->vas_win.winid);
 684        return rc;
 685}
 686
 687/*
 688 * The hypervisor reduces the available credits if the LPAR lost core. It
 689 * means the excessive windows should not be active and the user space
 690 * should not be using these windows to send compression requests to NX.
 691 * So the kernel closes the excessive windows and unmap the paste address
 692 * such that the user space receives paste instruction failure. Then up to
 693 * the user space to fall back to SW compression and manage with the
 694 * existing windows.
 695 */
 696static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
 697                                                                        bool migrate)
 698{
 699        struct pseries_vas_window *win, *tmp;
 700        struct vas_user_win_ref *task_ref;
 701        struct vm_area_struct *vma;
 702        int rc = 0, flag;
 703
 704        if (migrate)
 705                flag = VAS_WIN_MIGRATE_CLOSE;
 706        else
 707                flag = VAS_WIN_NO_CRED_CLOSE;
 708
 709        list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
 710                /*
 711                 * This window is already closed due to lost credit
 712                 * or for migration before. Go for next window.
 713                 * For migration, nothing to do since this window
 714                 * closed for DLPAR and will be reopened even on
 715                 * the destination system with other DLPAR operation.
 716                 */
 717                if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
 718                        (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
 719                        win->vas_win.status |= flag;
 720                        continue;
 721                }
 722
 723                task_ref = &win->vas_win.task_ref;
 724                mutex_lock(&task_ref->mmap_mutex);
 725                vma = task_ref->vma;
 726                /*
 727                 * Number of available credits are reduced, So select
 728                 * and close windows.
 729                 */
 730                win->vas_win.status |= flag;
 731
 732                mmap_write_lock(task_ref->mm);
 733                /*
 734                 * vma is set in the original mapping. But this mapping
 735                 * is done with mmap() after the window is opened with ioctl.
 736                 * so we may not see the original mapping if the core remove
 737                 * is done before the original mmap() and after the ioctl.
 738                 */
 739                if (vma)
 740                        zap_page_range(vma, vma->vm_start,
 741                                        vma->vm_end - vma->vm_start);
 742
 743                mmap_write_unlock(task_ref->mm);
 744                mutex_unlock(&task_ref->mmap_mutex);
 745                /*
 746                 * Close VAS window in the hypervisor, but do not
 747                 * free vas_window struct since it may be reused
 748                 * when the credit is available later (DLPAR with
 749                 * adding cores). This struct will be used
 750                 * later when the process issued with close(FD).
 751                 */
 752                rc = deallocate_free_window(win);
 753                /*
 754                 * This failure is from the hypervisor.
 755                 * No way to stop migration for these failures.
 756                 * So ignore error and continue closing other windows.
 757                 */
 758                if (rc && !migrate)
 759                        return rc;
 760
 761                vcap->nr_close_wins++;
 762
 763                /*
 764                 * For migration, do not depend on lpar_creds in case if
 765                 * mismatch with the hypervisor value (should not happen).
 766                 * So close all active windows in the list and will be
 767                 * reopened windows based on the new lpar_creds on the
 768                 * destination system during resume.
 769                 */
 770                if (!migrate && !--excess_creds)
 771                        break;
 772        }
 773
 774        return 0;
 775}
 776
 777/*
 778 * Get new VAS capabilities when the core add/removal configuration
 779 * changes. Reconfig window configurations based on the credits
 780 * availability from this new capabilities.
 781 */
 782int vas_reconfig_capabilties(u8 type, int new_nr_creds)
 783{
 784        struct vas_cop_feat_caps *caps;
 785        int old_nr_creds;
 786        struct vas_caps *vcaps;
 787        int rc = 0, nr_active_wins;
 788
 789        if (type >= VAS_MAX_FEAT_TYPE) {
 790                pr_err("Invalid credit type %d\n", type);
 791                return -EINVAL;
 792        }
 793
 794        vcaps = &vascaps[type];
 795        caps = &vcaps->caps;
 796
 797        mutex_lock(&vas_pseries_mutex);
 798
 799        old_nr_creds = atomic_read(&caps->nr_total_credits);
 800
 801        atomic_set(&caps->nr_total_credits, new_nr_creds);
 802        /*
 803         * The total number of available credits may be decreased or
 804         * inceased with DLPAR operation. Means some windows have to be
 805         * closed / reopened. Hold the vas_pseries_mutex so that the
 806         * the user space can not open new windows.
 807         */
 808        if (old_nr_creds <  new_nr_creds) {
 809                /*
 810                 * If the existing target credits is less than the new
 811                 * target, reopen windows if they are closed due to
 812                 * the previous DLPAR (core removal).
 813                 */
 814                rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
 815                                           false);
 816        } else {
 817                /*
 818                 * # active windows is more than new LPAR available
 819                 * credits. So close the excessive windows.
 820                 * On pseries, each window will have 1 credit.
 821                 */
 822                nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
 823                if (nr_active_wins > new_nr_creds)
 824                        rc = reconfig_close_windows(vcaps,
 825                                        nr_active_wins - new_nr_creds,
 826                                        false);
 827        }
 828
 829        mutex_unlock(&vas_pseries_mutex);
 830        return rc;
 831}
 832/*
 833 * Total number of default credits available (target_credits)
 834 * in LPAR depends on number of cores configured. It varies based on
 835 * whether processors are in shared mode or dedicated mode.
 836 * Get the notifier when CPU configuration is changed with DLPAR
 837 * operation so that get the new target_credits (vas default capabilities)
 838 * and then update the existing windows usage if needed.
 839 */
 840static int pseries_vas_notifier(struct notifier_block *nb,
 841                                unsigned long action, void *data)
 842{
 843        struct of_reconfig_data *rd = data;
 844        struct device_node *dn = rd->dn;
 845        const __be32 *intserv = NULL;
 846        int new_nr_creds, len, rc = 0;
 847
 848        if ((action == OF_RECONFIG_ATTACH_NODE) ||
 849                (action == OF_RECONFIG_DETACH_NODE))
 850                intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
 851                                          &len);
 852        /*
 853         * Processor config is not changed
 854         */
 855        if (!intserv)
 856                return NOTIFY_OK;
 857
 858        rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
 859                                        vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
 860                                        (u64)virt_to_phys(&hv_cop_caps));
 861        if (!rc) {
 862                new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
 863                rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
 864                                                new_nr_creds);
 865        }
 866
 867        if (rc)
 868                pr_err("Failed reconfig VAS capabilities with DLPAR\n");
 869
 870        return rc;
 871}
 872
 873static struct notifier_block pseries_vas_nb = {
 874        .notifier_call = pseries_vas_notifier,
 875};
 876
 877/*
 878 * For LPM, all windows have to be closed on the source partition
 879 * before migration and reopen them on the destination partition
 880 * after migration. So closing windows during suspend and
 881 * reopen them during resume.
 882 */
 883int vas_migration_handler(int action)
 884{
 885        struct vas_cop_feat_caps *caps;
 886        int old_nr_creds, new_nr_creds = 0;
 887        struct vas_caps *vcaps;
 888        int i, rc = 0;
 889
 890        /*
 891         * NX-GZIP is not enabled. Nothing to do for migration.
 892         */
 893        if (!copypaste_feat)
 894                return rc;
 895
 896        mutex_lock(&vas_pseries_mutex);
 897
 898        if (action == VAS_SUSPEND)
 899                migration_in_progress = true;
 900        else
 901                migration_in_progress = false;
 902
 903        for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
 904                vcaps = &vascaps[i];
 905                caps = &vcaps->caps;
 906                old_nr_creds = atomic_read(&caps->nr_total_credits);
 907
 908                rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
 909                                              vcaps->feat,
 910                                              (u64)virt_to_phys(&hv_cop_caps));
 911                if (!rc) {
 912                        new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
 913                        /*
 914                         * Should not happen. But incase print messages, close
 915                         * all windows in the list during suspend and reopen
 916                         * windows based on new lpar_creds on the destination
 917                         * system.
 918                         */
 919                        if (old_nr_creds != new_nr_creds) {
 920                                pr_err("Target credits mismatch with the hypervisor\n");
 921                                pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
 922                                        action, old_nr_creds, new_nr_creds);
 923                                pr_err("Used creds: %d, Active creds: %d\n",
 924                                        atomic_read(&caps->nr_used_credits),
 925                                        vcaps->nr_open_windows - vcaps->nr_close_wins);
 926                        }
 927                } else {
 928                        pr_err("state(%d): Get VAS capabilities failed with %d\n",
 929                                action, rc);
 930                        /*
 931                         * We can not stop migration with the current lpm
 932                         * implementation. So continue closing all windows in
 933                         * the list (during suspend) and return without
 934                         * opening windows (during resume) if VAS capabilities
 935                         * HCALL failed.
 936                         */
 937                        if (action == VAS_RESUME)
 938                                goto out;
 939                }
 940
 941                switch (action) {
 942                case VAS_SUSPEND:
 943                        rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
 944                                                        true);
 945                        break;
 946                case VAS_RESUME:
 947                        atomic_set(&caps->nr_total_credits, new_nr_creds);
 948                        rc = reconfig_open_windows(vcaps, new_nr_creds, true);
 949                        break;
 950                default:
 951                        /* should not happen */
 952                        pr_err("Invalid migration action %d\n", action);
 953                        rc = -EINVAL;
 954                        goto out;
 955                }
 956
 957                /*
 958                 * Ignore errors during suspend and return for resume.
 959                 */
 960                if (rc && (action == VAS_RESUME))
 961                        goto out;
 962        }
 963
 964out:
 965        mutex_unlock(&vas_pseries_mutex);
 966        return rc;
 967}
 968
 969static int __init pseries_vas_init(void)
 970{
 971        struct hv_vas_all_caps *hv_caps;
 972        int rc = 0;
 973
 974        /*
 975         * Linux supports user space COPY/PASTE only with Radix
 976         */
 977        if (!radix_enabled()) {
 978                pr_err("API is supported only with radix page tables\n");
 979                return -ENOTSUPP;
 980        }
 981
 982        hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
 983        if (!hv_caps)
 984                return -ENOMEM;
 985        /*
 986         * Get VAS overall capabilities by passing 0 to feature type.
 987         */
 988        rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
 989                                          (u64)virt_to_phys(hv_caps));
 990        if (rc)
 991                goto out;
 992
 993        caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
 994        caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
 995
 996        sysfs_pseries_vas_init(&caps_all);
 997
 998        /*
 999         * QOS capabilities available
1000         */
1001        if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
1002                rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
1003                                          VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
1004
1005                if (rc)
1006                        goto out;
1007        }
1008        /*
1009         * Default capabilities available
1010         */
1011        if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
1012                rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
1013                                          VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
1014
1015        if (!rc && copypaste_feat) {
1016                if (firmware_has_feature(FW_FEATURE_LPAR))
1017                        of_reconfig_notifier_register(&pseries_vas_nb);
1018
1019                pr_info("GZIP feature is available\n");
1020        } else {
1021                /*
1022                 * Should not happen, but only when get default
1023                 * capabilities HCALL failed. So disable copy paste
1024                 * feature.
1025                 */
1026                copypaste_feat = false;
1027        }
1028
1029out:
1030        kfree(hv_caps);
1031        return rc;
1032}
1033machine_device_initcall(pseries, pseries_vas_init);
1034