linux/arch/powerpc/kvm/book3s_xive_native.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2017-2019, IBM Corporation.
   4 */
   5
   6#define pr_fmt(fmt) "xive-kvm: " fmt
   7
   8#include <linux/kernel.h>
   9#include <linux/kvm_host.h>
  10#include <linux/err.h>
  11#include <linux/gfp.h>
  12#include <linux/spinlock.h>
  13#include <linux/delay.h>
  14#include <linux/file.h>
  15#include <asm/uaccess.h>
  16#include <asm/kvm_book3s.h>
  17#include <asm/kvm_ppc.h>
  18#include <asm/hvcall.h>
  19#include <asm/xive.h>
  20#include <asm/xive-regs.h>
  21#include <asm/debug.h>
  22#include <asm/debugfs.h>
  23#include <asm/opal.h>
  24
  25#include <linux/debugfs.h>
  26#include <linux/seq_file.h>
  27
  28#include "book3s_xive.h"
  29
  30static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
  31{
  32        u64 val;
  33
  34        /*
  35         * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10
  36         * load operation, so there is no need to enforce load-after-store
  37         * ordering.
  38         */
  39
  40        val = in_be64(xd->eoi_mmio + offset);
  41        return (u8)val;
  42}
  43
  44static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
  45{
  46        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
  47        struct xive_q *q = &xc->queues[prio];
  48
  49        xive_native_disable_queue(xc->vp_id, q, prio);
  50        if (q->qpage) {
  51                put_page(virt_to_page(q->qpage));
  52                q->qpage = NULL;
  53        }
  54}
  55
  56static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
  57                                              u8 prio, __be32 *qpage,
  58                                              u32 order, bool can_escalate)
  59{
  60        int rc;
  61        __be32 *qpage_prev = q->qpage;
  62
  63        rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
  64                                         can_escalate);
  65        if (rc)
  66                return rc;
  67
  68        if (qpage_prev)
  69                put_page(virt_to_page(qpage_prev));
  70
  71        return rc;
  72}
  73
  74void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
  75{
  76        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
  77        int i;
  78
  79        if (!kvmppc_xive_enabled(vcpu))
  80                return;
  81
  82        if (!xc)
  83                return;
  84
  85        pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
  86
  87        /* Ensure no interrupt is still routed to that VP */
  88        xc->valid = false;
  89        kvmppc_xive_disable_vcpu_interrupts(vcpu);
  90
  91        /* Free escalations */
  92        for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
  93                /* Free the escalation irq */
  94                if (xc->esc_virq[i]) {
  95                        if (xc->xive->single_escalation)
  96                                xive_cleanup_single_escalation(vcpu, xc,
  97                                                        xc->esc_virq[i]);
  98                        free_irq(xc->esc_virq[i], vcpu);
  99                        irq_dispose_mapping(xc->esc_virq[i]);
 100                        kfree(xc->esc_virq_names[i]);
 101                        xc->esc_virq[i] = 0;
 102                }
 103        }
 104
 105        /* Disable the VP */
 106        xive_native_disable_vp(xc->vp_id);
 107
 108        /* Clear the cam word so guest entry won't try to push context */
 109        vcpu->arch.xive_cam_word = 0;
 110
 111        /* Free the queues */
 112        for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
 113                kvmppc_xive_native_cleanup_queue(vcpu, i);
 114        }
 115
 116        /* Free the VP */
 117        kfree(xc);
 118
 119        /* Cleanup the vcpu */
 120        vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
 121        vcpu->arch.xive_vcpu = NULL;
 122}
 123
 124int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
 125                                    struct kvm_vcpu *vcpu, u32 server_num)
 126{
 127        struct kvmppc_xive *xive = dev->private;
 128        struct kvmppc_xive_vcpu *xc = NULL;
 129        int rc;
 130        u32 vp_id;
 131
 132        pr_devel("native_connect_vcpu(server=%d)\n", server_num);
 133
 134        if (dev->ops != &kvm_xive_native_ops) {
 135                pr_devel("Wrong ops !\n");
 136                return -EPERM;
 137        }
 138        if (xive->kvm != vcpu->kvm)
 139                return -EPERM;
 140        if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
 141                return -EBUSY;
 142
 143        mutex_lock(&xive->lock);
 144
 145        rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
 146        if (rc)
 147                goto bail;
 148
 149        xc = kzalloc(sizeof(*xc), GFP_KERNEL);
 150        if (!xc) {
 151                rc = -ENOMEM;
 152                goto bail;
 153        }
 154
 155        vcpu->arch.xive_vcpu = xc;
 156        xc->xive = xive;
 157        xc->vcpu = vcpu;
 158        xc->server_num = server_num;
 159
 160        xc->vp_id = vp_id;
 161        xc->valid = true;
 162        vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
 163
 164        rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
 165        if (rc) {
 166                pr_err("Failed to get VP info from OPAL: %d\n", rc);
 167                goto bail;
 168        }
 169
 170        /*
 171         * Enable the VP first as the single escalation mode will
 172         * affect escalation interrupts numbering
 173         */
 174        rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
 175        if (rc) {
 176                pr_err("Failed to enable VP in OPAL: %d\n", rc);
 177                goto bail;
 178        }
 179
 180        /* Configure VCPU fields for use by assembly push/pull */
 181        vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
 182        vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
 183
 184        /* TODO: reset all queues to a clean state ? */
 185bail:
 186        mutex_unlock(&xive->lock);
 187        if (rc)
 188                kvmppc_xive_native_cleanup_vcpu(vcpu);
 189
 190        return rc;
 191}
 192
 193/*
 194 * Device passthrough support
 195 */
 196static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
 197{
 198        struct kvmppc_xive *xive = kvm->arch.xive;
 199        pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;
 200
 201        if (irq >= KVMPPC_XIVE_NR_IRQS)
 202                return -EINVAL;
 203
 204        /*
 205         * Clear the ESB pages of the IRQ number being mapped (or
 206         * unmapped) into the guest and let the the VM fault handler
 207         * repopulate with the appropriate ESB pages (device or IC)
 208         */
 209        pr_debug("clearing esb pages for girq 0x%lx\n", irq);
 210        mutex_lock(&xive->mapping_lock);
 211        if (xive->mapping)
 212                unmap_mapping_range(xive->mapping,
 213                                    esb_pgoff << PAGE_SHIFT,
 214                                    2ull << PAGE_SHIFT, 1);
 215        mutex_unlock(&xive->mapping_lock);
 216        return 0;
 217}
 218
 219static struct kvmppc_xive_ops kvmppc_xive_native_ops =  {
 220        .reset_mapped = kvmppc_xive_native_reset_mapped,
 221};
 222
 223static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
 224{
 225        struct vm_area_struct *vma = vmf->vma;
 226        struct kvm_device *dev = vma->vm_file->private_data;
 227        struct kvmppc_xive *xive = dev->private;
 228        struct kvmppc_xive_src_block *sb;
 229        struct kvmppc_xive_irq_state *state;
 230        struct xive_irq_data *xd;
 231        u32 hw_num;
 232        u16 src;
 233        u64 page;
 234        unsigned long irq;
 235        u64 page_offset;
 236
 237        /*
 238         * Linux/KVM uses a two pages ESB setting, one for trigger and
 239         * one for EOI
 240         */
 241        page_offset = vmf->pgoff - vma->vm_pgoff;
 242        irq = page_offset / 2;
 243
 244        sb = kvmppc_xive_find_source(xive, irq, &src);
 245        if (!sb) {
 246                pr_devel("%s: source %lx not found !\n", __func__, irq);
 247                return VM_FAULT_SIGBUS;
 248        }
 249
 250        state = &sb->irq_state[src];
 251
 252        /* Some sanity checking */
 253        if (!state->valid) {
 254                pr_devel("%s: source %lx invalid !\n", __func__, irq);
 255                return VM_FAULT_SIGBUS;
 256        }
 257
 258        kvmppc_xive_select_irq(state, &hw_num, &xd);
 259
 260        arch_spin_lock(&sb->lock);
 261
 262        /*
 263         * first/even page is for trigger
 264         * second/odd page is for EOI and management.
 265         */
 266        page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
 267        arch_spin_unlock(&sb->lock);
 268
 269        if (WARN_ON(!page)) {
 270                pr_err("%s: accessing invalid ESB page for source %lx !\n",
 271                       __func__, irq);
 272                return VM_FAULT_SIGBUS;
 273        }
 274
 275        vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
 276        return VM_FAULT_NOPAGE;
 277}
 278
 279static const struct vm_operations_struct xive_native_esb_vmops = {
 280        .fault = xive_native_esb_fault,
 281};
 282
 283static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
 284{
 285        struct vm_area_struct *vma = vmf->vma;
 286
 287        switch (vmf->pgoff - vma->vm_pgoff) {
 288        case 0: /* HW - forbid access */
 289        case 1: /* HV - forbid access */
 290                return VM_FAULT_SIGBUS;
 291        case 2: /* OS */
 292                vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
 293                return VM_FAULT_NOPAGE;
 294        case 3: /* USER - TODO */
 295        default:
 296                return VM_FAULT_SIGBUS;
 297        }
 298}
 299
 300static const struct vm_operations_struct xive_native_tima_vmops = {
 301        .fault = xive_native_tima_fault,
 302};
 303
 304static int kvmppc_xive_native_mmap(struct kvm_device *dev,
 305                                   struct vm_area_struct *vma)
 306{
 307        struct kvmppc_xive *xive = dev->private;
 308
 309        /* We only allow mappings at fixed offset for now */
 310        if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
 311                if (vma_pages(vma) > 4)
 312                        return -EINVAL;
 313                vma->vm_ops = &xive_native_tima_vmops;
 314        } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
 315                if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
 316                        return -EINVAL;
 317                vma->vm_ops = &xive_native_esb_vmops;
 318        } else {
 319                return -EINVAL;
 320        }
 321
 322        vma->vm_flags |= VM_IO | VM_PFNMAP;
 323        vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
 324
 325        /*
 326         * Grab the KVM device file address_space to be able to clear
 327         * the ESB pages mapping when a device is passed-through into
 328         * the guest.
 329         */
 330        xive->mapping = vma->vm_file->f_mapping;
 331        return 0;
 332}
 333
 334static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
 335                                         u64 addr)
 336{
 337        struct kvmppc_xive_src_block *sb;
 338        struct kvmppc_xive_irq_state *state;
 339        u64 __user *ubufp = (u64 __user *) addr;
 340        u64 val;
 341        u16 idx;
 342        int rc;
 343
 344        pr_devel("%s irq=0x%lx\n", __func__, irq);
 345
 346        if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
 347                return -E2BIG;
 348
 349        sb = kvmppc_xive_find_source(xive, irq, &idx);
 350        if (!sb) {
 351                pr_debug("No source, creating source block...\n");
 352                sb = kvmppc_xive_create_src_block(xive, irq);
 353                if (!sb) {
 354                        pr_err("Failed to create block...\n");
 355                        return -ENOMEM;
 356                }
 357        }
 358        state = &sb->irq_state[idx];
 359
 360        if (get_user(val, ubufp)) {
 361                pr_err("fault getting user info !\n");
 362                return -EFAULT;
 363        }
 364
 365        arch_spin_lock(&sb->lock);
 366
 367        /*
 368         * If the source doesn't already have an IPI, allocate
 369         * one and get the corresponding data
 370         */
 371        if (!state->ipi_number) {
 372                state->ipi_number = xive_native_alloc_irq();
 373                if (state->ipi_number == 0) {
 374                        pr_err("Failed to allocate IRQ !\n");
 375                        rc = -ENXIO;
 376                        goto unlock;
 377                }
 378                xive_native_populate_irq_data(state->ipi_number,
 379                                              &state->ipi_data);
 380                pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
 381                         state->ipi_number, irq);
 382        }
 383
 384        /* Restore LSI state */
 385        if (val & KVM_XIVE_LEVEL_SENSITIVE) {
 386                state->lsi = true;
 387                if (val & KVM_XIVE_LEVEL_ASSERTED)
 388                        state->asserted = true;
 389                pr_devel("  LSI ! Asserted=%d\n", state->asserted);
 390        }
 391
 392        /* Mask IRQ to start with */
 393        state->act_server = 0;
 394        state->act_priority = MASKED;
 395        xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
 396        xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
 397
 398        /* Increment the number of valid sources and mark this one valid */
 399        if (!state->valid)
 400                xive->src_count++;
 401        state->valid = true;
 402
 403        rc = 0;
 404
 405unlock:
 406        arch_spin_unlock(&sb->lock);
 407
 408        return rc;
 409}
 410
 411static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
 412                                        struct kvmppc_xive_src_block *sb,
 413                                        struct kvmppc_xive_irq_state *state,
 414                                        u32 server, u8 priority, bool masked,
 415                                        u32 eisn)
 416{
 417        struct kvm *kvm = xive->kvm;
 418        u32 hw_num;
 419        int rc = 0;
 420
 421        arch_spin_lock(&sb->lock);
 422
 423        if (state->act_server == server && state->act_priority == priority &&
 424            state->eisn == eisn)
 425                goto unlock;
 426
 427        pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
 428                 priority, server, masked, state->act_server,
 429                 state->act_priority);
 430
 431        kvmppc_xive_select_irq(state, &hw_num, NULL);
 432
 433        if (priority != MASKED && !masked) {
 434                rc = kvmppc_xive_select_target(kvm, &server, priority);
 435                if (rc)
 436                        goto unlock;
 437
 438                state->act_priority = priority;
 439                state->act_server = server;
 440                state->eisn = eisn;
 441
 442                rc = xive_native_configure_irq(hw_num,
 443                                               kvmppc_xive_vp(xive, server),
 444                                               priority, eisn);
 445        } else {
 446                state->act_priority = MASKED;
 447                state->act_server = 0;
 448                state->eisn = 0;
 449
 450                rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
 451        }
 452
 453unlock:
 454        arch_spin_unlock(&sb->lock);
 455        return rc;
 456}
 457
 458static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
 459                                                long irq, u64 addr)
 460{
 461        struct kvmppc_xive_src_block *sb;
 462        struct kvmppc_xive_irq_state *state;
 463        u64 __user *ubufp = (u64 __user *) addr;
 464        u16 src;
 465        u64 kvm_cfg;
 466        u32 server;
 467        u8 priority;
 468        bool masked;
 469        u32 eisn;
 470
 471        sb = kvmppc_xive_find_source(xive, irq, &src);
 472        if (!sb)
 473                return -ENOENT;
 474
 475        state = &sb->irq_state[src];
 476
 477        if (!state->valid)
 478                return -EINVAL;
 479
 480        if (get_user(kvm_cfg, ubufp))
 481                return -EFAULT;
 482
 483        pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
 484
 485        priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
 486                KVM_XIVE_SOURCE_PRIORITY_SHIFT;
 487        server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
 488                KVM_XIVE_SOURCE_SERVER_SHIFT;
 489        masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
 490                KVM_XIVE_SOURCE_MASKED_SHIFT;
 491        eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
 492                KVM_XIVE_SOURCE_EISN_SHIFT;
 493
 494        if (priority != xive_prio_from_guest(priority)) {
 495                pr_err("invalid priority for queue %d for VCPU %d\n",
 496                       priority, server);
 497                return -EINVAL;
 498        }
 499
 500        return kvmppc_xive_native_update_source_config(xive, sb, state, server,
 501                                                       priority, masked, eisn);
 502}
 503
 504static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
 505                                          long irq, u64 addr)
 506{
 507        struct kvmppc_xive_src_block *sb;
 508        struct kvmppc_xive_irq_state *state;
 509        struct xive_irq_data *xd;
 510        u32 hw_num;
 511        u16 src;
 512        int rc = 0;
 513
 514        pr_devel("%s irq=0x%lx", __func__, irq);
 515
 516        sb = kvmppc_xive_find_source(xive, irq, &src);
 517        if (!sb)
 518                return -ENOENT;
 519
 520        state = &sb->irq_state[src];
 521
 522        rc = -EINVAL;
 523
 524        arch_spin_lock(&sb->lock);
 525
 526        if (state->valid) {
 527                kvmppc_xive_select_irq(state, &hw_num, &xd);
 528                xive_native_sync_source(hw_num);
 529                rc = 0;
 530        }
 531
 532        arch_spin_unlock(&sb->lock);
 533        return rc;
 534}
 535
 536static int xive_native_validate_queue_size(u32 qshift)
 537{
 538        /*
 539         * We only support 64K pages for the moment. This is also
 540         * advertised in the DT property "ibm,xive-eq-sizes"
 541         */
 542        switch (qshift) {
 543        case 0: /* EQ reset */
 544        case 16:
 545                return 0;
 546        case 12:
 547        case 21:
 548        case 24:
 549        default:
 550                return -EINVAL;
 551        }
 552}
 553
 554static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
 555                                               long eq_idx, u64 addr)
 556{
 557        struct kvm *kvm = xive->kvm;
 558        struct kvm_vcpu *vcpu;
 559        struct kvmppc_xive_vcpu *xc;
 560        void __user *ubufp = (void __user *) addr;
 561        u32 server;
 562        u8 priority;
 563        struct kvm_ppc_xive_eq kvm_eq;
 564        int rc;
 565        __be32 *qaddr = 0;
 566        struct page *page;
 567        struct xive_q *q;
 568        gfn_t gfn;
 569        unsigned long page_size;
 570        int srcu_idx;
 571
 572        /*
 573         * Demangle priority/server tuple from the EQ identifier
 574         */
 575        priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
 576                KVM_XIVE_EQ_PRIORITY_SHIFT;
 577        server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
 578                KVM_XIVE_EQ_SERVER_SHIFT;
 579
 580        if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
 581                return -EFAULT;
 582
 583        vcpu = kvmppc_xive_find_server(kvm, server);
 584        if (!vcpu) {
 585                pr_err("Can't find server %d\n", server);
 586                return -ENOENT;
 587        }
 588        xc = vcpu->arch.xive_vcpu;
 589
 590        if (priority != xive_prio_from_guest(priority)) {
 591                pr_err("Trying to restore invalid queue %d for VCPU %d\n",
 592                       priority, server);
 593                return -EINVAL;
 594        }
 595        q = &xc->queues[priority];
 596
 597        pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
 598                 __func__, server, priority, kvm_eq.flags,
 599                 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
 600
 601        /* reset queue and disable queueing */
 602        if (!kvm_eq.qshift) {
 603                q->guest_qaddr  = 0;
 604                q->guest_qshift = 0;
 605
 606                rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
 607                                                        NULL, 0, true);
 608                if (rc) {
 609                        pr_err("Failed to reset queue %d for VCPU %d: %d\n",
 610                               priority, xc->server_num, rc);
 611                        return rc;
 612                }
 613
 614                return 0;
 615        }
 616
 617        /*
 618         * sPAPR specifies a "Unconditional Notify (n) flag" for the
 619         * H_INT_SET_QUEUE_CONFIG hcall which forces notification
 620         * without using the coalescing mechanisms provided by the
 621         * XIVE END ESBs. This is required on KVM as notification
 622         * using the END ESBs is not supported.
 623         */
 624        if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
 625                pr_err("invalid flags %d\n", kvm_eq.flags);
 626                return -EINVAL;
 627        }
 628
 629        rc = xive_native_validate_queue_size(kvm_eq.qshift);
 630        if (rc) {
 631                pr_err("invalid queue size %d\n", kvm_eq.qshift);
 632                return rc;
 633        }
 634
 635        if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
 636                pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
 637                       1ull << kvm_eq.qshift);
 638                return -EINVAL;
 639        }
 640
 641        srcu_idx = srcu_read_lock(&kvm->srcu);
 642        gfn = gpa_to_gfn(kvm_eq.qaddr);
 643
 644        page_size = kvm_host_page_size(vcpu, gfn);
 645        if (1ull << kvm_eq.qshift > page_size) {
 646                srcu_read_unlock(&kvm->srcu, srcu_idx);
 647                pr_warn("Incompatible host page size %lx!\n", page_size);
 648                return -EINVAL;
 649        }
 650
 651        page = gfn_to_page(kvm, gfn);
 652        if (is_error_page(page)) {
 653                srcu_read_unlock(&kvm->srcu, srcu_idx);
 654                pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
 655                return -EINVAL;
 656        }
 657
 658        qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
 659        srcu_read_unlock(&kvm->srcu, srcu_idx);
 660
 661        /*
 662         * Backup the queue page guest address to the mark EQ page
 663         * dirty for migration.
 664         */
 665        q->guest_qaddr  = kvm_eq.qaddr;
 666        q->guest_qshift = kvm_eq.qshift;
 667
 668         /*
 669          * Unconditional Notification is forced by default at the
 670          * OPAL level because the use of END ESBs is not supported by
 671          * Linux.
 672          */
 673        rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
 674                                        (__be32 *) qaddr, kvm_eq.qshift, true);
 675        if (rc) {
 676                pr_err("Failed to configure queue %d for VCPU %d: %d\n",
 677                       priority, xc->server_num, rc);
 678                put_page(page);
 679                return rc;
 680        }
 681
 682        /*
 683         * Only restore the queue state when needed. When doing the
 684         * H_INT_SET_SOURCE_CONFIG hcall, it should not.
 685         */
 686        if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
 687                rc = xive_native_set_queue_state(xc->vp_id, priority,
 688                                                 kvm_eq.qtoggle,
 689                                                 kvm_eq.qindex);
 690                if (rc)
 691                        goto error;
 692        }
 693
 694        rc = kvmppc_xive_attach_escalation(vcpu, priority,
 695                                           xive->single_escalation);
 696error:
 697        if (rc)
 698                kvmppc_xive_native_cleanup_queue(vcpu, priority);
 699        return rc;
 700}
 701
 702static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
 703                                               long eq_idx, u64 addr)
 704{
 705        struct kvm *kvm = xive->kvm;
 706        struct kvm_vcpu *vcpu;
 707        struct kvmppc_xive_vcpu *xc;
 708        struct xive_q *q;
 709        void __user *ubufp = (u64 __user *) addr;
 710        u32 server;
 711        u8 priority;
 712        struct kvm_ppc_xive_eq kvm_eq;
 713        u64 qaddr;
 714        u64 qshift;
 715        u64 qeoi_page;
 716        u32 escalate_irq;
 717        u64 qflags;
 718        int rc;
 719
 720        /*
 721         * Demangle priority/server tuple from the EQ identifier
 722         */
 723        priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
 724                KVM_XIVE_EQ_PRIORITY_SHIFT;
 725        server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
 726                KVM_XIVE_EQ_SERVER_SHIFT;
 727
 728        vcpu = kvmppc_xive_find_server(kvm, server);
 729        if (!vcpu) {
 730                pr_err("Can't find server %d\n", server);
 731                return -ENOENT;
 732        }
 733        xc = vcpu->arch.xive_vcpu;
 734
 735        if (priority != xive_prio_from_guest(priority)) {
 736                pr_err("invalid priority for queue %d for VCPU %d\n",
 737                       priority, server);
 738                return -EINVAL;
 739        }
 740        q = &xc->queues[priority];
 741
 742        memset(&kvm_eq, 0, sizeof(kvm_eq));
 743
 744        if (!q->qpage)
 745                return 0;
 746
 747        rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
 748                                        &qeoi_page, &escalate_irq, &qflags);
 749        if (rc)
 750                return rc;
 751
 752        kvm_eq.flags = 0;
 753        if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
 754                kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
 755
 756        kvm_eq.qshift = q->guest_qshift;
 757        kvm_eq.qaddr  = q->guest_qaddr;
 758
 759        rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
 760                                         &kvm_eq.qindex);
 761        if (rc)
 762                return rc;
 763
 764        pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
 765                 __func__, server, priority, kvm_eq.flags,
 766                 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
 767
 768        if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
 769                return -EFAULT;
 770
 771        return 0;
 772}
 773
 774static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
 775{
 776        int i;
 777
 778        for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
 779                struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
 780
 781                if (!state->valid)
 782                        continue;
 783
 784                if (state->act_priority == MASKED)
 785                        continue;
 786
 787                state->eisn = 0;
 788                state->act_server = 0;
 789                state->act_priority = MASKED;
 790                xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
 791                xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
 792                if (state->pt_number) {
 793                        xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
 794                        xive_native_configure_irq(state->pt_number,
 795                                                  0, MASKED, 0);
 796                }
 797        }
 798}
 799
 800static int kvmppc_xive_reset(struct kvmppc_xive *xive)
 801{
 802        struct kvm *kvm = xive->kvm;
 803        struct kvm_vcpu *vcpu;
 804        unsigned int i;
 805
 806        pr_devel("%s\n", __func__);
 807
 808        mutex_lock(&xive->lock);
 809
 810        kvm_for_each_vcpu(i, vcpu, kvm) {
 811                struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
 812                unsigned int prio;
 813
 814                if (!xc)
 815                        continue;
 816
 817                kvmppc_xive_disable_vcpu_interrupts(vcpu);
 818
 819                for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
 820
 821                        /* Single escalation, no queue 7 */
 822                        if (prio == 7 && xive->single_escalation)
 823                                break;
 824
 825                        if (xc->esc_virq[prio]) {
 826                                free_irq(xc->esc_virq[prio], vcpu);
 827                                irq_dispose_mapping(xc->esc_virq[prio]);
 828                                kfree(xc->esc_virq_names[prio]);
 829                                xc->esc_virq[prio] = 0;
 830                        }
 831
 832                        kvmppc_xive_native_cleanup_queue(vcpu, prio);
 833                }
 834        }
 835
 836        for (i = 0; i <= xive->max_sbid; i++) {
 837                struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
 838
 839                if (sb) {
 840                        arch_spin_lock(&sb->lock);
 841                        kvmppc_xive_reset_sources(sb);
 842                        arch_spin_unlock(&sb->lock);
 843                }
 844        }
 845
 846        mutex_unlock(&xive->lock);
 847
 848        return 0;
 849}
 850
 851static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
 852{
 853        int j;
 854
 855        for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
 856                struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
 857                struct xive_irq_data *xd;
 858                u32 hw_num;
 859
 860                if (!state->valid)
 861                        continue;
 862
 863                /*
 864                 * The struct kvmppc_xive_irq_state reflects the state
 865                 * of the EAS configuration and not the state of the
 866                 * source. The source is masked setting the PQ bits to
 867                 * '-Q', which is what is being done before calling
 868                 * the KVM_DEV_XIVE_EQ_SYNC control.
 869                 *
 870                 * If a source EAS is configured, OPAL syncs the XIVE
 871                 * IC of the source and the XIVE IC of the previous
 872                 * target if any.
 873                 *
 874                 * So it should be fine ignoring MASKED sources as
 875                 * they have been synced already.
 876                 */
 877                if (state->act_priority == MASKED)
 878                        continue;
 879
 880                kvmppc_xive_select_irq(state, &hw_num, &xd);
 881                xive_native_sync_source(hw_num);
 882                xive_native_sync_queue(hw_num);
 883        }
 884}
 885
 886static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
 887{
 888        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
 889        unsigned int prio;
 890        int srcu_idx;
 891
 892        if (!xc)
 893                return -ENOENT;
 894
 895        for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
 896                struct xive_q *q = &xc->queues[prio];
 897
 898                if (!q->qpage)
 899                        continue;
 900
 901                /* Mark EQ page dirty for migration */
 902                srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 903                mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
 904                srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
 905        }
 906        return 0;
 907}
 908
 909static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
 910{
 911        struct kvm *kvm = xive->kvm;
 912        struct kvm_vcpu *vcpu;
 913        unsigned int i;
 914
 915        pr_devel("%s\n", __func__);
 916
 917        mutex_lock(&xive->lock);
 918        for (i = 0; i <= xive->max_sbid; i++) {
 919                struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
 920
 921                if (sb) {
 922                        arch_spin_lock(&sb->lock);
 923                        kvmppc_xive_native_sync_sources(sb);
 924                        arch_spin_unlock(&sb->lock);
 925                }
 926        }
 927
 928        kvm_for_each_vcpu(i, vcpu, kvm) {
 929                kvmppc_xive_native_vcpu_eq_sync(vcpu);
 930        }
 931        mutex_unlock(&xive->lock);
 932
 933        return 0;
 934}
 935
 936static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
 937                                       struct kvm_device_attr *attr)
 938{
 939        struct kvmppc_xive *xive = dev->private;
 940
 941        switch (attr->group) {
 942        case KVM_DEV_XIVE_GRP_CTRL:
 943                switch (attr->attr) {
 944                case KVM_DEV_XIVE_RESET:
 945                        return kvmppc_xive_reset(xive);
 946                case KVM_DEV_XIVE_EQ_SYNC:
 947                        return kvmppc_xive_native_eq_sync(xive);
 948                case KVM_DEV_XIVE_NR_SERVERS:
 949                        return kvmppc_xive_set_nr_servers(xive, attr->addr);
 950                }
 951                break;
 952        case KVM_DEV_XIVE_GRP_SOURCE:
 953                return kvmppc_xive_native_set_source(xive, attr->attr,
 954                                                     attr->addr);
 955        case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
 956                return kvmppc_xive_native_set_source_config(xive, attr->attr,
 957                                                            attr->addr);
 958        case KVM_DEV_XIVE_GRP_EQ_CONFIG:
 959                return kvmppc_xive_native_set_queue_config(xive, attr->attr,
 960                                                           attr->addr);
 961        case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
 962                return kvmppc_xive_native_sync_source(xive, attr->attr,
 963                                                      attr->addr);
 964        }
 965        return -ENXIO;
 966}
 967
 968static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
 969                                       struct kvm_device_attr *attr)
 970{
 971        struct kvmppc_xive *xive = dev->private;
 972
 973        switch (attr->group) {
 974        case KVM_DEV_XIVE_GRP_EQ_CONFIG:
 975                return kvmppc_xive_native_get_queue_config(xive, attr->attr,
 976                                                           attr->addr);
 977        }
 978        return -ENXIO;
 979}
 980
 981static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
 982                                       struct kvm_device_attr *attr)
 983{
 984        switch (attr->group) {
 985        case KVM_DEV_XIVE_GRP_CTRL:
 986                switch (attr->attr) {
 987                case KVM_DEV_XIVE_RESET:
 988                case KVM_DEV_XIVE_EQ_SYNC:
 989                case KVM_DEV_XIVE_NR_SERVERS:
 990                        return 0;
 991                }
 992                break;
 993        case KVM_DEV_XIVE_GRP_SOURCE:
 994        case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
 995        case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
 996                if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
 997                    attr->attr < KVMPPC_XIVE_NR_IRQS)
 998                        return 0;
 999                break;
1000        case KVM_DEV_XIVE_GRP_EQ_CONFIG:
1001                return 0;
1002        }
1003        return -ENXIO;
1004}
1005
1006/*
1007 * Called when device fd is closed.  kvm->lock is held.
1008 */
1009static void kvmppc_xive_native_release(struct kvm_device *dev)
1010{
1011        struct kvmppc_xive *xive = dev->private;
1012        struct kvm *kvm = xive->kvm;
1013        struct kvm_vcpu *vcpu;
1014        int i;
1015
1016        pr_devel("Releasing xive native device\n");
1017
1018        /*
1019         * Clear the KVM device file address_space which is used to
1020         * unmap the ESB pages when a device is passed-through.
1021         */
1022        mutex_lock(&xive->mapping_lock);
1023        xive->mapping = NULL;
1024        mutex_unlock(&xive->mapping_lock);
1025
1026        /*
1027         * Since this is the device release function, we know that
1028         * userspace does not have any open fd or mmap referring to
1029         * the device.  Therefore there can not be any of the
1030         * device attribute set/get, mmap, or page fault functions
1031         * being executed concurrently, and similarly, the
1032         * connect_vcpu and set/clr_mapped functions also cannot
1033         * be being executed.
1034         */
1035
1036        debugfs_remove(xive->dentry);
1037
1038        /*
1039         * We should clean up the vCPU interrupt presenters first.
1040         */
1041        kvm_for_each_vcpu(i, vcpu, kvm) {
1042                /*
1043                 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
1044                 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
1045                 * Holding the vcpu->mutex also means that the vcpu cannot
1046                 * be executing the KVM_RUN ioctl, and therefore it cannot
1047                 * be executing the XIVE push or pull code or accessing
1048                 * the XIVE MMIO regions.
1049                 */
1050                mutex_lock(&vcpu->mutex);
1051                kvmppc_xive_native_cleanup_vcpu(vcpu);
1052                mutex_unlock(&vcpu->mutex);
1053        }
1054
1055        /*
1056         * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
1057         * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
1058         * against xive code getting called during vcpu execution or
1059         * set/get one_reg operations.
1060         */
1061        kvm->arch.xive = NULL;
1062
1063        for (i = 0; i <= xive->max_sbid; i++) {
1064                if (xive->src_blocks[i])
1065                        kvmppc_xive_free_sources(xive->src_blocks[i]);
1066                kfree(xive->src_blocks[i]);
1067                xive->src_blocks[i] = NULL;
1068        }
1069
1070        if (xive->vp_base != XIVE_INVALID_VP)
1071                xive_native_free_vp_block(xive->vp_base);
1072
1073        /*
1074         * A reference of the kvmppc_xive pointer is now kept under
1075         * the xive_devices struct of the machine for reuse. It is
1076         * freed when the VM is destroyed for now until we fix all the
1077         * execution paths.
1078         */
1079
1080        kfree(dev);
1081}
1082
1083/*
1084 * Create a XIVE device.  kvm->lock is held.
1085 */
1086static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
1087{
1088        struct kvmppc_xive *xive;
1089        struct kvm *kvm = dev->kvm;
1090
1091        pr_devel("Creating xive native device\n");
1092
1093        if (kvm->arch.xive)
1094                return -EEXIST;
1095
1096        xive = kvmppc_xive_get_device(kvm, type);
1097        if (!xive)
1098                return -ENOMEM;
1099
1100        dev->private = xive;
1101        xive->dev = dev;
1102        xive->kvm = kvm;
1103        mutex_init(&xive->mapping_lock);
1104        mutex_init(&xive->lock);
1105
1106        /* VP allocation is delayed to the first call to connect_vcpu */
1107        xive->vp_base = XIVE_INVALID_VP;
1108        /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
1109         * on a POWER9 system.
1110         */
1111        xive->nr_servers = KVM_MAX_VCPUS;
1112
1113        xive->single_escalation = xive_native_has_single_escalation();
1114        xive->ops = &kvmppc_xive_native_ops;
1115
1116        kvm->arch.xive = xive;
1117        return 0;
1118}
1119
1120/*
1121 * Interrupt Pending Buffer (IPB) offset
1122 */
1123#define TM_IPB_SHIFT 40
1124#define TM_IPB_MASK  (((u64) 0xFF) << TM_IPB_SHIFT)
1125
1126int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1127{
1128        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1129        u64 opal_state;
1130        int rc;
1131
1132        if (!kvmppc_xive_enabled(vcpu))
1133                return -EPERM;
1134
1135        if (!xc)
1136                return -ENOENT;
1137
1138        /* Thread context registers. We only care about IPB and CPPR */
1139        val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
1140
1141        /* Get the VP state from OPAL */
1142        rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
1143        if (rc)
1144                return rc;
1145
1146        /*
1147         * Capture the backup of IPB register in the NVT structure and
1148         * merge it in our KVM VP state.
1149         */
1150        val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
1151
1152        pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
1153                 __func__,
1154                 vcpu->arch.xive_saved_state.nsr,
1155                 vcpu->arch.xive_saved_state.cppr,
1156                 vcpu->arch.xive_saved_state.ipb,
1157                 vcpu->arch.xive_saved_state.pipr,
1158                 vcpu->arch.xive_saved_state.w01,
1159                 (u32) vcpu->arch.xive_cam_word, opal_state);
1160
1161        return 0;
1162}
1163
1164int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1165{
1166        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1167        struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
1168
1169        pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
1170                 val->xive_timaval[0], val->xive_timaval[1]);
1171
1172        if (!kvmppc_xive_enabled(vcpu))
1173                return -EPERM;
1174
1175        if (!xc || !xive)
1176                return -ENOENT;
1177
1178        /* We can't update the state of a "pushed" VCPU  */
1179        if (WARN_ON(vcpu->arch.xive_pushed))
1180                return -EBUSY;
1181
1182        /*
1183         * Restore the thread context registers. IPB and CPPR should
1184         * be the only ones that matter.
1185         */
1186        vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
1187
1188        /*
1189         * There is no need to restore the XIVE internal state (IPB
1190         * stored in the NVT) as the IPB register was merged in KVM VP
1191         * state when captured.
1192         */
1193        return 0;
1194}
1195
1196bool kvmppc_xive_native_supported(void)
1197{
1198        return xive_native_has_queue_state_support();
1199}
1200
1201static int xive_native_debug_show(struct seq_file *m, void *private)
1202{
1203        struct kvmppc_xive *xive = m->private;
1204        struct kvm *kvm = xive->kvm;
1205        struct kvm_vcpu *vcpu;
1206        unsigned int i;
1207
1208        if (!kvm)
1209                return 0;
1210
1211        seq_puts(m, "=========\nVCPU state\n=========\n");
1212
1213        kvm_for_each_vcpu(i, vcpu, kvm) {
1214                struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1215
1216                if (!xc)
1217                        continue;
1218
1219                seq_printf(m, "VCPU %d: VP=%#x/%02x\n"
1220                           "    NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
1221                           xc->server_num, xc->vp_id, xc->vp_chip_id,
1222                           vcpu->arch.xive_saved_state.nsr,
1223                           vcpu->arch.xive_saved_state.cppr,
1224                           vcpu->arch.xive_saved_state.ipb,
1225                           vcpu->arch.xive_saved_state.pipr,
1226                           be64_to_cpu(vcpu->arch.xive_saved_state.w01),
1227                           be32_to_cpu(vcpu->arch.xive_cam_word));
1228
1229                kvmppc_xive_debug_show_queues(m, vcpu);
1230        }
1231
1232        seq_puts(m, "=========\nSources\n=========\n");
1233
1234        for (i = 0; i <= xive->max_sbid; i++) {
1235                struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1236
1237                if (sb) {
1238                        arch_spin_lock(&sb->lock);
1239                        kvmppc_xive_debug_show_sources(m, sb);
1240                        arch_spin_unlock(&sb->lock);
1241                }
1242        }
1243
1244        return 0;
1245}
1246
1247DEFINE_SHOW_ATTRIBUTE(xive_native_debug);
1248
1249static void xive_native_debugfs_init(struct kvmppc_xive *xive)
1250{
1251        char *name;
1252
1253        name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
1254        if (!name) {
1255                pr_err("%s: no memory for name\n", __func__);
1256                return;
1257        }
1258
1259        xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
1260                                           xive, &xive_native_debug_fops);
1261
1262        pr_debug("%s: created %s\n", __func__, name);
1263        kfree(name);
1264}
1265
1266static void kvmppc_xive_native_init(struct kvm_device *dev)
1267{
1268        struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
1269
1270        /* Register some debug interfaces */
1271        xive_native_debugfs_init(xive);
1272}
1273
1274struct kvm_device_ops kvm_xive_native_ops = {
1275        .name = "kvm-xive-native",
1276        .create = kvmppc_xive_native_create,
1277        .init = kvmppc_xive_native_init,
1278        .release = kvmppc_xive_native_release,
1279        .set_attr = kvmppc_xive_native_set_attr,
1280        .get_attr = kvmppc_xive_native_get_attr,
1281        .has_attr = kvmppc_xive_native_has_attr,
1282        .mmap = kvmppc_xive_native_mmap,
1283};
1284
1285void kvmppc_xive_native_init_module(void)
1286{
1287        ;
1288}
1289
1290void kvmppc_xive_native_exit_module(void)
1291{
1292        ;
1293}
1294