linux/arch/powerpc/kvm/book3s_xive_native.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2017-2019, IBM Corporation.
   4 */
   5
   6#define pr_fmt(fmt) "xive-kvm: " fmt
   7
   8#include <linux/kernel.h>
   9#include <linux/kvm_host.h>
  10#include <linux/err.h>
  11#include <linux/gfp.h>
  12#include <linux/spinlock.h>
  13#include <linux/delay.h>
  14#include <linux/file.h>
  15#include <linux/irqdomain.h>
  16#include <asm/uaccess.h>
  17#include <asm/kvm_book3s.h>
  18#include <asm/kvm_ppc.h>
  19#include <asm/hvcall.h>
  20#include <asm/xive.h>
  21#include <asm/xive-regs.h>
  22#include <asm/debug.h>
  23#include <asm/debugfs.h>
  24#include <asm/opal.h>
  25
  26#include <linux/debugfs.h>
  27#include <linux/seq_file.h>
  28
  29#include "book3s_xive.h"
  30
  31static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
  32{
  33        u64 val;
  34
  35        /*
  36         * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10
  37         * load operation, so there is no need to enforce load-after-store
  38         * ordering.
  39         */
  40
  41        val = in_be64(xd->eoi_mmio + offset);
  42        return (u8)val;
  43}
  44
  45static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
  46{
  47        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
  48        struct xive_q *q = &xc->queues[prio];
  49
  50        xive_native_disable_queue(xc->vp_id, q, prio);
  51        if (q->qpage) {
  52                put_page(virt_to_page(q->qpage));
  53                q->qpage = NULL;
  54        }
  55}
  56
  57static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
  58                                              u8 prio, __be32 *qpage,
  59                                              u32 order, bool can_escalate)
  60{
  61        int rc;
  62        __be32 *qpage_prev = q->qpage;
  63
  64        rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
  65                                         can_escalate);
  66        if (rc)
  67                return rc;
  68
  69        if (qpage_prev)
  70                put_page(virt_to_page(qpage_prev));
  71
  72        return rc;
  73}
  74
  75void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
  76{
  77        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
  78        int i;
  79
  80        if (!kvmppc_xive_enabled(vcpu))
  81                return;
  82
  83        if (!xc)
  84                return;
  85
  86        pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
  87
  88        /* Ensure no interrupt is still routed to that VP */
  89        xc->valid = false;
  90        kvmppc_xive_disable_vcpu_interrupts(vcpu);
  91
  92        /* Free escalations */
  93        for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
  94                /* Free the escalation irq */
  95                if (xc->esc_virq[i]) {
  96                        if (xc->xive->single_escalation)
  97                                xive_cleanup_single_escalation(vcpu, xc,
  98                                                        xc->esc_virq[i]);
  99                        free_irq(xc->esc_virq[i], vcpu);
 100                        irq_dispose_mapping(xc->esc_virq[i]);
 101                        kfree(xc->esc_virq_names[i]);
 102                        xc->esc_virq[i] = 0;
 103                }
 104        }
 105
 106        /* Disable the VP */
 107        xive_native_disable_vp(xc->vp_id);
 108
 109        /* Clear the cam word so guest entry won't try to push context */
 110        vcpu->arch.xive_cam_word = 0;
 111
 112        /* Free the queues */
 113        for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
 114                kvmppc_xive_native_cleanup_queue(vcpu, i);
 115        }
 116
 117        /* Free the VP */
 118        kfree(xc);
 119
 120        /* Cleanup the vcpu */
 121        vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
 122        vcpu->arch.xive_vcpu = NULL;
 123}
 124
 125int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
 126                                    struct kvm_vcpu *vcpu, u32 server_num)
 127{
 128        struct kvmppc_xive *xive = dev->private;
 129        struct kvmppc_xive_vcpu *xc = NULL;
 130        int rc;
 131        u32 vp_id;
 132
 133        pr_devel("native_connect_vcpu(server=%d)\n", server_num);
 134
 135        if (dev->ops != &kvm_xive_native_ops) {
 136                pr_devel("Wrong ops !\n");
 137                return -EPERM;
 138        }
 139        if (xive->kvm != vcpu->kvm)
 140                return -EPERM;
 141        if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
 142                return -EBUSY;
 143
 144        mutex_lock(&xive->lock);
 145
 146        rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
 147        if (rc)
 148                goto bail;
 149
 150        xc = kzalloc(sizeof(*xc), GFP_KERNEL);
 151        if (!xc) {
 152                rc = -ENOMEM;
 153                goto bail;
 154        }
 155
 156        vcpu->arch.xive_vcpu = xc;
 157        xc->xive = xive;
 158        xc->vcpu = vcpu;
 159        xc->server_num = server_num;
 160
 161        xc->vp_id = vp_id;
 162        xc->valid = true;
 163        vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
 164
 165        rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
 166        if (rc) {
 167                pr_err("Failed to get VP info from OPAL: %d\n", rc);
 168                goto bail;
 169        }
 170
 171        /*
 172         * Enable the VP first as the single escalation mode will
 173         * affect escalation interrupts numbering
 174         */
 175        rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
 176        if (rc) {
 177                pr_err("Failed to enable VP in OPAL: %d\n", rc);
 178                goto bail;
 179        }
 180
 181        /* Configure VCPU fields for use by assembly push/pull */
 182        vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
 183        vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
 184
 185        /* TODO: reset all queues to a clean state ? */
 186bail:
 187        mutex_unlock(&xive->lock);
 188        if (rc)
 189                kvmppc_xive_native_cleanup_vcpu(vcpu);
 190
 191        return rc;
 192}
 193
 194/*
 195 * Device passthrough support
 196 */
 197static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
 198{
 199        struct kvmppc_xive *xive = kvm->arch.xive;
 200        pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;
 201
 202        if (irq >= KVMPPC_XIVE_NR_IRQS)
 203                return -EINVAL;
 204
 205        /*
 206         * Clear the ESB pages of the IRQ number being mapped (or
 207         * unmapped) into the guest and let the the VM fault handler
 208         * repopulate with the appropriate ESB pages (device or IC)
 209         */
 210        pr_debug("clearing esb pages for girq 0x%lx\n", irq);
 211        mutex_lock(&xive->mapping_lock);
 212        if (xive->mapping)
 213                unmap_mapping_range(xive->mapping,
 214                                    esb_pgoff << PAGE_SHIFT,
 215                                    2ull << PAGE_SHIFT, 1);
 216        mutex_unlock(&xive->mapping_lock);
 217        return 0;
 218}
 219
 220static struct kvmppc_xive_ops kvmppc_xive_native_ops =  {
 221        .reset_mapped = kvmppc_xive_native_reset_mapped,
 222};
 223
 224static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
 225{
 226        struct vm_area_struct *vma = vmf->vma;
 227        struct kvm_device *dev = vma->vm_file->private_data;
 228        struct kvmppc_xive *xive = dev->private;
 229        struct kvmppc_xive_src_block *sb;
 230        struct kvmppc_xive_irq_state *state;
 231        struct xive_irq_data *xd;
 232        u32 hw_num;
 233        u16 src;
 234        u64 page;
 235        unsigned long irq;
 236        u64 page_offset;
 237
 238        /*
 239         * Linux/KVM uses a two pages ESB setting, one for trigger and
 240         * one for EOI
 241         */
 242        page_offset = vmf->pgoff - vma->vm_pgoff;
 243        irq = page_offset / 2;
 244
 245        sb = kvmppc_xive_find_source(xive, irq, &src);
 246        if (!sb) {
 247                pr_devel("%s: source %lx not found !\n", __func__, irq);
 248                return VM_FAULT_SIGBUS;
 249        }
 250
 251        state = &sb->irq_state[src];
 252
 253        /* Some sanity checking */
 254        if (!state->valid) {
 255                pr_devel("%s: source %lx invalid !\n", __func__, irq);
 256                return VM_FAULT_SIGBUS;
 257        }
 258
 259        kvmppc_xive_select_irq(state, &hw_num, &xd);
 260
 261        arch_spin_lock(&sb->lock);
 262
 263        /*
 264         * first/even page is for trigger
 265         * second/odd page is for EOI and management.
 266         */
 267        page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
 268        arch_spin_unlock(&sb->lock);
 269
 270        if (WARN_ON(!page)) {
 271                pr_err("%s: accessing invalid ESB page for source %lx !\n",
 272                       __func__, irq);
 273                return VM_FAULT_SIGBUS;
 274        }
 275
 276        vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
 277        return VM_FAULT_NOPAGE;
 278}
 279
 280static const struct vm_operations_struct xive_native_esb_vmops = {
 281        .fault = xive_native_esb_fault,
 282};
 283
 284static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
 285{
 286        struct vm_area_struct *vma = vmf->vma;
 287
 288        switch (vmf->pgoff - vma->vm_pgoff) {
 289        case 0: /* HW - forbid access */
 290        case 1: /* HV - forbid access */
 291                return VM_FAULT_SIGBUS;
 292        case 2: /* OS */
 293                vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
 294                return VM_FAULT_NOPAGE;
 295        case 3: /* USER - TODO */
 296        default:
 297                return VM_FAULT_SIGBUS;
 298        }
 299}
 300
 301static const struct vm_operations_struct xive_native_tima_vmops = {
 302        .fault = xive_native_tima_fault,
 303};
 304
 305static int kvmppc_xive_native_mmap(struct kvm_device *dev,
 306                                   struct vm_area_struct *vma)
 307{
 308        struct kvmppc_xive *xive = dev->private;
 309
 310        /* We only allow mappings at fixed offset for now */
 311        if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
 312                if (vma_pages(vma) > 4)
 313                        return -EINVAL;
 314                vma->vm_ops = &xive_native_tima_vmops;
 315        } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
 316                if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
 317                        return -EINVAL;
 318                vma->vm_ops = &xive_native_esb_vmops;
 319        } else {
 320                return -EINVAL;
 321        }
 322
 323        vma->vm_flags |= VM_IO | VM_PFNMAP;
 324        vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
 325
 326        /*
 327         * Grab the KVM device file address_space to be able to clear
 328         * the ESB pages mapping when a device is passed-through into
 329         * the guest.
 330         */
 331        xive->mapping = vma->vm_file->f_mapping;
 332        return 0;
 333}
 334
 335static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
 336                                         u64 addr)
 337{
 338        struct kvmppc_xive_src_block *sb;
 339        struct kvmppc_xive_irq_state *state;
 340        u64 __user *ubufp = (u64 __user *) addr;
 341        u64 val;
 342        u16 idx;
 343        int rc;
 344
 345        pr_devel("%s irq=0x%lx\n", __func__, irq);
 346
 347        if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
 348                return -E2BIG;
 349
 350        sb = kvmppc_xive_find_source(xive, irq, &idx);
 351        if (!sb) {
 352                pr_debug("No source, creating source block...\n");
 353                sb = kvmppc_xive_create_src_block(xive, irq);
 354                if (!sb) {
 355                        pr_err("Failed to create block...\n");
 356                        return -ENOMEM;
 357                }
 358        }
 359        state = &sb->irq_state[idx];
 360
 361        if (get_user(val, ubufp)) {
 362                pr_err("fault getting user info !\n");
 363                return -EFAULT;
 364        }
 365
 366        arch_spin_lock(&sb->lock);
 367
 368        /*
 369         * If the source doesn't already have an IPI, allocate
 370         * one and get the corresponding data
 371         */
 372        if (!state->ipi_number) {
 373                state->ipi_number = xive_native_alloc_irq();
 374                if (state->ipi_number == 0) {
 375                        pr_err("Failed to allocate IRQ !\n");
 376                        rc = -ENXIO;
 377                        goto unlock;
 378                }
 379                xive_native_populate_irq_data(state->ipi_number,
 380                                              &state->ipi_data);
 381                pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
 382                         state->ipi_number, irq);
 383        }
 384
 385        /* Restore LSI state */
 386        if (val & KVM_XIVE_LEVEL_SENSITIVE) {
 387                state->lsi = true;
 388                if (val & KVM_XIVE_LEVEL_ASSERTED)
 389                        state->asserted = true;
 390                pr_devel("  LSI ! Asserted=%d\n", state->asserted);
 391        }
 392
 393        /* Mask IRQ to start with */
 394        state->act_server = 0;
 395        state->act_priority = MASKED;
 396        xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
 397        xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
 398
 399        /* Increment the number of valid sources and mark this one valid */
 400        if (!state->valid)
 401                xive->src_count++;
 402        state->valid = true;
 403
 404        rc = 0;
 405
 406unlock:
 407        arch_spin_unlock(&sb->lock);
 408
 409        return rc;
 410}
 411
 412static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
 413                                        struct kvmppc_xive_src_block *sb,
 414                                        struct kvmppc_xive_irq_state *state,
 415                                        u32 server, u8 priority, bool masked,
 416                                        u32 eisn)
 417{
 418        struct kvm *kvm = xive->kvm;
 419        u32 hw_num;
 420        int rc = 0;
 421
 422        arch_spin_lock(&sb->lock);
 423
 424        if (state->act_server == server && state->act_priority == priority &&
 425            state->eisn == eisn)
 426                goto unlock;
 427
 428        pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
 429                 priority, server, masked, state->act_server,
 430                 state->act_priority);
 431
 432        kvmppc_xive_select_irq(state, &hw_num, NULL);
 433
 434        if (priority != MASKED && !masked) {
 435                rc = kvmppc_xive_select_target(kvm, &server, priority);
 436                if (rc)
 437                        goto unlock;
 438
 439                state->act_priority = priority;
 440                state->act_server = server;
 441                state->eisn = eisn;
 442
 443                rc = xive_native_configure_irq(hw_num,
 444                                               kvmppc_xive_vp(xive, server),
 445                                               priority, eisn);
 446        } else {
 447                state->act_priority = MASKED;
 448                state->act_server = 0;
 449                state->eisn = 0;
 450
 451                rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
 452        }
 453
 454unlock:
 455        arch_spin_unlock(&sb->lock);
 456        return rc;
 457}
 458
 459static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
 460                                                long irq, u64 addr)
 461{
 462        struct kvmppc_xive_src_block *sb;
 463        struct kvmppc_xive_irq_state *state;
 464        u64 __user *ubufp = (u64 __user *) addr;
 465        u16 src;
 466        u64 kvm_cfg;
 467        u32 server;
 468        u8 priority;
 469        bool masked;
 470        u32 eisn;
 471
 472        sb = kvmppc_xive_find_source(xive, irq, &src);
 473        if (!sb)
 474                return -ENOENT;
 475
 476        state = &sb->irq_state[src];
 477
 478        if (!state->valid)
 479                return -EINVAL;
 480
 481        if (get_user(kvm_cfg, ubufp))
 482                return -EFAULT;
 483
 484        pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
 485
 486        priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
 487                KVM_XIVE_SOURCE_PRIORITY_SHIFT;
 488        server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
 489                KVM_XIVE_SOURCE_SERVER_SHIFT;
 490        masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
 491                KVM_XIVE_SOURCE_MASKED_SHIFT;
 492        eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
 493                KVM_XIVE_SOURCE_EISN_SHIFT;
 494
 495        if (priority != xive_prio_from_guest(priority)) {
 496                pr_err("invalid priority for queue %d for VCPU %d\n",
 497                       priority, server);
 498                return -EINVAL;
 499        }
 500
 501        return kvmppc_xive_native_update_source_config(xive, sb, state, server,
 502                                                       priority, masked, eisn);
 503}
 504
 505static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
 506                                          long irq, u64 addr)
 507{
 508        struct kvmppc_xive_src_block *sb;
 509        struct kvmppc_xive_irq_state *state;
 510        struct xive_irq_data *xd;
 511        u32 hw_num;
 512        u16 src;
 513        int rc = 0;
 514
 515        pr_devel("%s irq=0x%lx", __func__, irq);
 516
 517        sb = kvmppc_xive_find_source(xive, irq, &src);
 518        if (!sb)
 519                return -ENOENT;
 520
 521        state = &sb->irq_state[src];
 522
 523        rc = -EINVAL;
 524
 525        arch_spin_lock(&sb->lock);
 526
 527        if (state->valid) {
 528                kvmppc_xive_select_irq(state, &hw_num, &xd);
 529                xive_native_sync_source(hw_num);
 530                rc = 0;
 531        }
 532
 533        arch_spin_unlock(&sb->lock);
 534        return rc;
 535}
 536
 537static int xive_native_validate_queue_size(u32 qshift)
 538{
 539        /*
 540         * We only support 64K pages for the moment. This is also
 541         * advertised in the DT property "ibm,xive-eq-sizes"
 542         */
 543        switch (qshift) {
 544        case 0: /* EQ reset */
 545        case 16:
 546                return 0;
 547        case 12:
 548        case 21:
 549        case 24:
 550        default:
 551                return -EINVAL;
 552        }
 553}
 554
 555static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
 556                                               long eq_idx, u64 addr)
 557{
 558        struct kvm *kvm = xive->kvm;
 559        struct kvm_vcpu *vcpu;
 560        struct kvmppc_xive_vcpu *xc;
 561        void __user *ubufp = (void __user *) addr;
 562        u32 server;
 563        u8 priority;
 564        struct kvm_ppc_xive_eq kvm_eq;
 565        int rc;
 566        __be32 *qaddr = 0;
 567        struct page *page;
 568        struct xive_q *q;
 569        gfn_t gfn;
 570        unsigned long page_size;
 571        int srcu_idx;
 572
 573        /*
 574         * Demangle priority/server tuple from the EQ identifier
 575         */
 576        priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
 577                KVM_XIVE_EQ_PRIORITY_SHIFT;
 578        server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
 579                KVM_XIVE_EQ_SERVER_SHIFT;
 580
 581        if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
 582                return -EFAULT;
 583
 584        vcpu = kvmppc_xive_find_server(kvm, server);
 585        if (!vcpu) {
 586                pr_err("Can't find server %d\n", server);
 587                return -ENOENT;
 588        }
 589        xc = vcpu->arch.xive_vcpu;
 590
 591        if (priority != xive_prio_from_guest(priority)) {
 592                pr_err("Trying to restore invalid queue %d for VCPU %d\n",
 593                       priority, server);
 594                return -EINVAL;
 595        }
 596        q = &xc->queues[priority];
 597
 598        pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
 599                 __func__, server, priority, kvm_eq.flags,
 600                 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
 601
 602        /* reset queue and disable queueing */
 603        if (!kvm_eq.qshift) {
 604                q->guest_qaddr  = 0;
 605                q->guest_qshift = 0;
 606
 607                rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
 608                                                        NULL, 0, true);
 609                if (rc) {
 610                        pr_err("Failed to reset queue %d for VCPU %d: %d\n",
 611                               priority, xc->server_num, rc);
 612                        return rc;
 613                }
 614
 615                return 0;
 616        }
 617
 618        /*
 619         * sPAPR specifies a "Unconditional Notify (n) flag" for the
 620         * H_INT_SET_QUEUE_CONFIG hcall which forces notification
 621         * without using the coalescing mechanisms provided by the
 622         * XIVE END ESBs. This is required on KVM as notification
 623         * using the END ESBs is not supported.
 624         */
 625        if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
 626                pr_err("invalid flags %d\n", kvm_eq.flags);
 627                return -EINVAL;
 628        }
 629
 630        rc = xive_native_validate_queue_size(kvm_eq.qshift);
 631        if (rc) {
 632                pr_err("invalid queue size %d\n", kvm_eq.qshift);
 633                return rc;
 634        }
 635
 636        if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
 637                pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
 638                       1ull << kvm_eq.qshift);
 639                return -EINVAL;
 640        }
 641
 642        srcu_idx = srcu_read_lock(&kvm->srcu);
 643        gfn = gpa_to_gfn(kvm_eq.qaddr);
 644
 645        page_size = kvm_host_page_size(vcpu, gfn);
 646        if (1ull << kvm_eq.qshift > page_size) {
 647                srcu_read_unlock(&kvm->srcu, srcu_idx);
 648                pr_warn("Incompatible host page size %lx!\n", page_size);
 649                return -EINVAL;
 650        }
 651
 652        page = gfn_to_page(kvm, gfn);
 653        if (is_error_page(page)) {
 654                srcu_read_unlock(&kvm->srcu, srcu_idx);
 655                pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
 656                return -EINVAL;
 657        }
 658
 659        qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
 660        srcu_read_unlock(&kvm->srcu, srcu_idx);
 661
 662        /*
 663         * Backup the queue page guest address to the mark EQ page
 664         * dirty for migration.
 665         */
 666        q->guest_qaddr  = kvm_eq.qaddr;
 667        q->guest_qshift = kvm_eq.qshift;
 668
 669         /*
 670          * Unconditional Notification is forced by default at the
 671          * OPAL level because the use of END ESBs is not supported by
 672          * Linux.
 673          */
 674        rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
 675                                        (__be32 *) qaddr, kvm_eq.qshift, true);
 676        if (rc) {
 677                pr_err("Failed to configure queue %d for VCPU %d: %d\n",
 678                       priority, xc->server_num, rc);
 679                put_page(page);
 680                return rc;
 681        }
 682
 683        /*
 684         * Only restore the queue state when needed. When doing the
 685         * H_INT_SET_SOURCE_CONFIG hcall, it should not.
 686         */
 687        if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
 688                rc = xive_native_set_queue_state(xc->vp_id, priority,
 689                                                 kvm_eq.qtoggle,
 690                                                 kvm_eq.qindex);
 691                if (rc)
 692                        goto error;
 693        }
 694
 695        rc = kvmppc_xive_attach_escalation(vcpu, priority,
 696                                           xive->single_escalation);
 697error:
 698        if (rc)
 699                kvmppc_xive_native_cleanup_queue(vcpu, priority);
 700        return rc;
 701}
 702
 703static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
 704                                               long eq_idx, u64 addr)
 705{
 706        struct kvm *kvm = xive->kvm;
 707        struct kvm_vcpu *vcpu;
 708        struct kvmppc_xive_vcpu *xc;
 709        struct xive_q *q;
 710        void __user *ubufp = (u64 __user *) addr;
 711        u32 server;
 712        u8 priority;
 713        struct kvm_ppc_xive_eq kvm_eq;
 714        u64 qaddr;
 715        u64 qshift;
 716        u64 qeoi_page;
 717        u32 escalate_irq;
 718        u64 qflags;
 719        int rc;
 720
 721        /*
 722         * Demangle priority/server tuple from the EQ identifier
 723         */
 724        priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
 725                KVM_XIVE_EQ_PRIORITY_SHIFT;
 726        server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
 727                KVM_XIVE_EQ_SERVER_SHIFT;
 728
 729        vcpu = kvmppc_xive_find_server(kvm, server);
 730        if (!vcpu) {
 731                pr_err("Can't find server %d\n", server);
 732                return -ENOENT;
 733        }
 734        xc = vcpu->arch.xive_vcpu;
 735
 736        if (priority != xive_prio_from_guest(priority)) {
 737                pr_err("invalid priority for queue %d for VCPU %d\n",
 738                       priority, server);
 739                return -EINVAL;
 740        }
 741        q = &xc->queues[priority];
 742
 743        memset(&kvm_eq, 0, sizeof(kvm_eq));
 744
 745        if (!q->qpage)
 746                return 0;
 747
 748        rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
 749                                        &qeoi_page, &escalate_irq, &qflags);
 750        if (rc)
 751                return rc;
 752
 753        kvm_eq.flags = 0;
 754        if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
 755                kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
 756
 757        kvm_eq.qshift = q->guest_qshift;
 758        kvm_eq.qaddr  = q->guest_qaddr;
 759
 760        rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
 761                                         &kvm_eq.qindex);
 762        if (rc)
 763                return rc;
 764
 765        pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
 766                 __func__, server, priority, kvm_eq.flags,
 767                 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
 768
 769        if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
 770                return -EFAULT;
 771
 772        return 0;
 773}
 774
 775static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
 776{
 777        int i;
 778
 779        for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
 780                struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
 781
 782                if (!state->valid)
 783                        continue;
 784
 785                if (state->act_priority == MASKED)
 786                        continue;
 787
 788                state->eisn = 0;
 789                state->act_server = 0;
 790                state->act_priority = MASKED;
 791                xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
 792                xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
 793                if (state->pt_number) {
 794                        xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
 795                        xive_native_configure_irq(state->pt_number,
 796                                                  0, MASKED, 0);
 797                }
 798        }
 799}
 800
 801static int kvmppc_xive_reset(struct kvmppc_xive *xive)
 802{
 803        struct kvm *kvm = xive->kvm;
 804        struct kvm_vcpu *vcpu;
 805        unsigned int i;
 806
 807        pr_devel("%s\n", __func__);
 808
 809        mutex_lock(&xive->lock);
 810
 811        kvm_for_each_vcpu(i, vcpu, kvm) {
 812                struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
 813                unsigned int prio;
 814
 815                if (!xc)
 816                        continue;
 817
 818                kvmppc_xive_disable_vcpu_interrupts(vcpu);
 819
 820                for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
 821
 822                        /* Single escalation, no queue 7 */
 823                        if (prio == 7 && xive->single_escalation)
 824                                break;
 825
 826                        if (xc->esc_virq[prio]) {
 827                                free_irq(xc->esc_virq[prio], vcpu);
 828                                irq_dispose_mapping(xc->esc_virq[prio]);
 829                                kfree(xc->esc_virq_names[prio]);
 830                                xc->esc_virq[prio] = 0;
 831                        }
 832
 833                        kvmppc_xive_native_cleanup_queue(vcpu, prio);
 834                }
 835        }
 836
 837        for (i = 0; i <= xive->max_sbid; i++) {
 838                struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
 839
 840                if (sb) {
 841                        arch_spin_lock(&sb->lock);
 842                        kvmppc_xive_reset_sources(sb);
 843                        arch_spin_unlock(&sb->lock);
 844                }
 845        }
 846
 847        mutex_unlock(&xive->lock);
 848
 849        return 0;
 850}
 851
 852static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
 853{
 854        int j;
 855
 856        for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
 857                struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
 858                struct xive_irq_data *xd;
 859                u32 hw_num;
 860
 861                if (!state->valid)
 862                        continue;
 863
 864                /*
 865                 * The struct kvmppc_xive_irq_state reflects the state
 866                 * of the EAS configuration and not the state of the
 867                 * source. The source is masked setting the PQ bits to
 868                 * '-Q', which is what is being done before calling
 869                 * the KVM_DEV_XIVE_EQ_SYNC control.
 870                 *
 871                 * If a source EAS is configured, OPAL syncs the XIVE
 872                 * IC of the source and the XIVE IC of the previous
 873                 * target if any.
 874                 *
 875                 * So it should be fine ignoring MASKED sources as
 876                 * they have been synced already.
 877                 */
 878                if (state->act_priority == MASKED)
 879                        continue;
 880
 881                kvmppc_xive_select_irq(state, &hw_num, &xd);
 882                xive_native_sync_source(hw_num);
 883                xive_native_sync_queue(hw_num);
 884        }
 885}
 886
 887static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
 888{
 889        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
 890        unsigned int prio;
 891        int srcu_idx;
 892
 893        if (!xc)
 894                return -ENOENT;
 895
 896        for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
 897                struct xive_q *q = &xc->queues[prio];
 898
 899                if (!q->qpage)
 900                        continue;
 901
 902                /* Mark EQ page dirty for migration */
 903                srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 904                mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
 905                srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
 906        }
 907        return 0;
 908}
 909
 910static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
 911{
 912        struct kvm *kvm = xive->kvm;
 913        struct kvm_vcpu *vcpu;
 914        unsigned int i;
 915
 916        pr_devel("%s\n", __func__);
 917
 918        mutex_lock(&xive->lock);
 919        for (i = 0; i <= xive->max_sbid; i++) {
 920                struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
 921
 922                if (sb) {
 923                        arch_spin_lock(&sb->lock);
 924                        kvmppc_xive_native_sync_sources(sb);
 925                        arch_spin_unlock(&sb->lock);
 926                }
 927        }
 928
 929        kvm_for_each_vcpu(i, vcpu, kvm) {
 930                kvmppc_xive_native_vcpu_eq_sync(vcpu);
 931        }
 932        mutex_unlock(&xive->lock);
 933
 934        return 0;
 935}
 936
 937static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
 938                                       struct kvm_device_attr *attr)
 939{
 940        struct kvmppc_xive *xive = dev->private;
 941
 942        switch (attr->group) {
 943        case KVM_DEV_XIVE_GRP_CTRL:
 944                switch (attr->attr) {
 945                case KVM_DEV_XIVE_RESET:
 946                        return kvmppc_xive_reset(xive);
 947                case KVM_DEV_XIVE_EQ_SYNC:
 948                        return kvmppc_xive_native_eq_sync(xive);
 949                case KVM_DEV_XIVE_NR_SERVERS:
 950                        return kvmppc_xive_set_nr_servers(xive, attr->addr);
 951                }
 952                break;
 953        case KVM_DEV_XIVE_GRP_SOURCE:
 954                return kvmppc_xive_native_set_source(xive, attr->attr,
 955                                                     attr->addr);
 956        case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
 957                return kvmppc_xive_native_set_source_config(xive, attr->attr,
 958                                                            attr->addr);
 959        case KVM_DEV_XIVE_GRP_EQ_CONFIG:
 960                return kvmppc_xive_native_set_queue_config(xive, attr->attr,
 961                                                           attr->addr);
 962        case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
 963                return kvmppc_xive_native_sync_source(xive, attr->attr,
 964                                                      attr->addr);
 965        }
 966        return -ENXIO;
 967}
 968
 969static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
 970                                       struct kvm_device_attr *attr)
 971{
 972        struct kvmppc_xive *xive = dev->private;
 973
 974        switch (attr->group) {
 975        case KVM_DEV_XIVE_GRP_EQ_CONFIG:
 976                return kvmppc_xive_native_get_queue_config(xive, attr->attr,
 977                                                           attr->addr);
 978        }
 979        return -ENXIO;
 980}
 981
 982static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
 983                                       struct kvm_device_attr *attr)
 984{
 985        switch (attr->group) {
 986        case KVM_DEV_XIVE_GRP_CTRL:
 987                switch (attr->attr) {
 988                case KVM_DEV_XIVE_RESET:
 989                case KVM_DEV_XIVE_EQ_SYNC:
 990                case KVM_DEV_XIVE_NR_SERVERS:
 991                        return 0;
 992                }
 993                break;
 994        case KVM_DEV_XIVE_GRP_SOURCE:
 995        case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
 996        case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
 997                if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
 998                    attr->attr < KVMPPC_XIVE_NR_IRQS)
 999                        return 0;
1000                break;
1001        case KVM_DEV_XIVE_GRP_EQ_CONFIG:
1002                return 0;
1003        }
1004        return -ENXIO;
1005}
1006
1007/*
1008 * Called when device fd is closed.  kvm->lock is held.
1009 */
1010static void kvmppc_xive_native_release(struct kvm_device *dev)
1011{
1012        struct kvmppc_xive *xive = dev->private;
1013        struct kvm *kvm = xive->kvm;
1014        struct kvm_vcpu *vcpu;
1015        int i;
1016
1017        pr_devel("Releasing xive native device\n");
1018
1019        /*
1020         * Clear the KVM device file address_space which is used to
1021         * unmap the ESB pages when a device is passed-through.
1022         */
1023        mutex_lock(&xive->mapping_lock);
1024        xive->mapping = NULL;
1025        mutex_unlock(&xive->mapping_lock);
1026
1027        /*
1028         * Since this is the device release function, we know that
1029         * userspace does not have any open fd or mmap referring to
1030         * the device.  Therefore there can not be any of the
1031         * device attribute set/get, mmap, or page fault functions
1032         * being executed concurrently, and similarly, the
1033         * connect_vcpu and set/clr_mapped functions also cannot
1034         * be being executed.
1035         */
1036
1037        debugfs_remove(xive->dentry);
1038
1039        /*
1040         * We should clean up the vCPU interrupt presenters first.
1041         */
1042        kvm_for_each_vcpu(i, vcpu, kvm) {
1043                /*
1044                 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
1045                 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
1046                 * Holding the vcpu->mutex also means that the vcpu cannot
1047                 * be executing the KVM_RUN ioctl, and therefore it cannot
1048                 * be executing the XIVE push or pull code or accessing
1049                 * the XIVE MMIO regions.
1050                 */
1051                mutex_lock(&vcpu->mutex);
1052                kvmppc_xive_native_cleanup_vcpu(vcpu);
1053                mutex_unlock(&vcpu->mutex);
1054        }
1055
1056        /*
1057         * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
1058         * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
1059         * against xive code getting called during vcpu execution or
1060         * set/get one_reg operations.
1061         */
1062        kvm->arch.xive = NULL;
1063
1064        for (i = 0; i <= xive->max_sbid; i++) {
1065                if (xive->src_blocks[i])
1066                        kvmppc_xive_free_sources(xive->src_blocks[i]);
1067                kfree(xive->src_blocks[i]);
1068                xive->src_blocks[i] = NULL;
1069        }
1070
1071        if (xive->vp_base != XIVE_INVALID_VP)
1072                xive_native_free_vp_block(xive->vp_base);
1073
1074        /*
1075         * A reference of the kvmppc_xive pointer is now kept under
1076         * the xive_devices struct of the machine for reuse. It is
1077         * freed when the VM is destroyed for now until we fix all the
1078         * execution paths.
1079         */
1080
1081        kfree(dev);
1082}
1083
1084/*
1085 * Create a XIVE device.  kvm->lock is held.
1086 */
1087static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
1088{
1089        struct kvmppc_xive *xive;
1090        struct kvm *kvm = dev->kvm;
1091
1092        pr_devel("Creating xive native device\n");
1093
1094        if (kvm->arch.xive)
1095                return -EEXIST;
1096
1097        xive = kvmppc_xive_get_device(kvm, type);
1098        if (!xive)
1099                return -ENOMEM;
1100
1101        dev->private = xive;
1102        xive->dev = dev;
1103        xive->kvm = kvm;
1104        mutex_init(&xive->mapping_lock);
1105        mutex_init(&xive->lock);
1106
1107        /* VP allocation is delayed to the first call to connect_vcpu */
1108        xive->vp_base = XIVE_INVALID_VP;
1109        /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
1110         * on a POWER9 system.
1111         */
1112        xive->nr_servers = KVM_MAX_VCPUS;
1113
1114        xive->single_escalation = xive_native_has_single_escalation();
1115        xive->ops = &kvmppc_xive_native_ops;
1116
1117        kvm->arch.xive = xive;
1118        return 0;
1119}
1120
1121/*
1122 * Interrupt Pending Buffer (IPB) offset
1123 */
1124#define TM_IPB_SHIFT 40
1125#define TM_IPB_MASK  (((u64) 0xFF) << TM_IPB_SHIFT)
1126
1127int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1128{
1129        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1130        u64 opal_state;
1131        int rc;
1132
1133        if (!kvmppc_xive_enabled(vcpu))
1134                return -EPERM;
1135
1136        if (!xc)
1137                return -ENOENT;
1138
1139        /* Thread context registers. We only care about IPB and CPPR */
1140        val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
1141
1142        /* Get the VP state from OPAL */
1143        rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
1144        if (rc)
1145                return rc;
1146
1147        /*
1148         * Capture the backup of IPB register in the NVT structure and
1149         * merge it in our KVM VP state.
1150         */
1151        val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
1152
1153        pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
1154                 __func__,
1155                 vcpu->arch.xive_saved_state.nsr,
1156                 vcpu->arch.xive_saved_state.cppr,
1157                 vcpu->arch.xive_saved_state.ipb,
1158                 vcpu->arch.xive_saved_state.pipr,
1159                 vcpu->arch.xive_saved_state.w01,
1160                 (u32) vcpu->arch.xive_cam_word, opal_state);
1161
1162        return 0;
1163}
1164
1165int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1166{
1167        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1168        struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
1169
1170        pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
1171                 val->xive_timaval[0], val->xive_timaval[1]);
1172
1173        if (!kvmppc_xive_enabled(vcpu))
1174                return -EPERM;
1175
1176        if (!xc || !xive)
1177                return -ENOENT;
1178
1179        /* We can't update the state of a "pushed" VCPU  */
1180        if (WARN_ON(vcpu->arch.xive_pushed))
1181                return -EBUSY;
1182
1183        /*
1184         * Restore the thread context registers. IPB and CPPR should
1185         * be the only ones that matter.
1186         */
1187        vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
1188
1189        /*
1190         * There is no need to restore the XIVE internal state (IPB
1191         * stored in the NVT) as the IPB register was merged in KVM VP
1192         * state when captured.
1193         */
1194        return 0;
1195}
1196
1197bool kvmppc_xive_native_supported(void)
1198{
1199        return xive_native_has_queue_state_support();
1200}
1201
1202static int xive_native_debug_show(struct seq_file *m, void *private)
1203{
1204        struct kvmppc_xive *xive = m->private;
1205        struct kvm *kvm = xive->kvm;
1206        struct kvm_vcpu *vcpu;
1207        unsigned int i;
1208
1209        if (!kvm)
1210                return 0;
1211
1212        seq_puts(m, "=========\nVCPU state\n=========\n");
1213
1214        kvm_for_each_vcpu(i, vcpu, kvm) {
1215                struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1216
1217                if (!xc)
1218                        continue;
1219
1220                seq_printf(m, "VCPU %d: VP=%#x/%02x\n"
1221                           "    NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
1222                           xc->server_num, xc->vp_id, xc->vp_chip_id,
1223                           vcpu->arch.xive_saved_state.nsr,
1224                           vcpu->arch.xive_saved_state.cppr,
1225                           vcpu->arch.xive_saved_state.ipb,
1226                           vcpu->arch.xive_saved_state.pipr,
1227                           be64_to_cpu(vcpu->arch.xive_saved_state.w01),
1228                           be32_to_cpu(vcpu->arch.xive_cam_word));
1229
1230                kvmppc_xive_debug_show_queues(m, vcpu);
1231        }
1232
1233        seq_puts(m, "=========\nSources\n=========\n");
1234
1235        for (i = 0; i <= xive->max_sbid; i++) {
1236                struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1237
1238                if (sb) {
1239                        arch_spin_lock(&sb->lock);
1240                        kvmppc_xive_debug_show_sources(m, sb);
1241                        arch_spin_unlock(&sb->lock);
1242                }
1243        }
1244
1245        return 0;
1246}
1247
1248DEFINE_SHOW_ATTRIBUTE(xive_native_debug);
1249
1250static void xive_native_debugfs_init(struct kvmppc_xive *xive)
1251{
1252        char *name;
1253
1254        name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
1255        if (!name) {
1256                pr_err("%s: no memory for name\n", __func__);
1257                return;
1258        }
1259
1260        xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
1261                                           xive, &xive_native_debug_fops);
1262
1263        pr_debug("%s: created %s\n", __func__, name);
1264        kfree(name);
1265}
1266
1267static void kvmppc_xive_native_init(struct kvm_device *dev)
1268{
1269        struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
1270
1271        /* Register some debug interfaces */
1272        xive_native_debugfs_init(xive);
1273}
1274
1275struct kvm_device_ops kvm_xive_native_ops = {
1276        .name = "kvm-xive-native",
1277        .create = kvmppc_xive_native_create,
1278        .init = kvmppc_xive_native_init,
1279        .release = kvmppc_xive_native_release,
1280        .set_attr = kvmppc_xive_native_set_attr,
1281        .get_attr = kvmppc_xive_native_get_attr,
1282        .has_attr = kvmppc_xive_native_has_attr,
1283        .mmap = kvmppc_xive_native_mmap,
1284};
1285