linux/arch/powerpc/kvm/book3s_xive_template.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
   4 */
   5
   6/* File to be included by other .c files */
   7
   8#define XGLUE(a,b) a##b
   9#define GLUE(a,b) XGLUE(a,b)
  10
  11/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
  12#define XICS_DUMMY      1
  13
  14static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
  15{
  16        u8 cppr;
  17        u16 ack;
  18
  19        /*
  20         * Ensure any previous store to CPPR is ordered vs.
  21         * the subsequent loads from PIPR or ACK.
  22         */
  23        eieio();
  24
  25        /* Perform the acknowledge OS to register cycle. */
  26        ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
  27
  28        /* Synchronize subsequent queue accesses */
  29        mb();
  30
  31        /* XXX Check grouping level */
  32
  33        /* Anything ? */
  34        if (!((ack >> 8) & TM_QW1_NSR_EO))
  35                return;
  36
  37        /* Grab CPPR of the most favored pending interrupt */
  38        cppr = ack & 0xff;
  39        if (cppr < 8)
  40                xc->pending |= 1 << cppr;
  41
  42#ifdef XIVE_RUNTIME_CHECKS
  43        /* Check consistency */
  44        if (cppr >= xc->hw_cppr)
  45                pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
  46                        smp_processor_id(), cppr, xc->hw_cppr);
  47#endif
  48
  49        /*
  50         * Update our image of the HW CPPR. We don't yet modify
  51         * xc->cppr, this will be done as we scan for interrupts
  52         * in the queues.
  53         */
  54        xc->hw_cppr = cppr;
  55}
  56
  57static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
  58{
  59        u64 val;
  60
  61        if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
  62                offset |= XIVE_ESB_LD_ST_MO;
  63
  64        val =__x_readq(__x_eoi_page(xd) + offset);
  65#ifdef __LITTLE_ENDIAN__
  66        val >>= 64-8;
  67#endif
  68        return (u8)val;
  69}
  70
  71
  72static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
  73{
  74        /* If the XIVE supports the new "store EOI facility, use it */
  75        if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
  76                __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
  77        else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
  78                /*
  79                 * For LSIs the HW EOI cycle is used rather than PQ bits,
  80                 * as they are automatically re-triggred in HW when still
  81                 * pending.
  82                 */
  83                __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
  84        } else {
  85                uint64_t eoi_val;
  86
  87                /*
  88                 * Otherwise for EOI, we use the special MMIO that does
  89                 * a clear of both P and Q and returns the old Q,
  90                 * except for LSIs where we use the "EOI cycle" special
  91                 * load.
  92                 *
  93                 * This allows us to then do a re-trigger if Q was set
  94                 * rather than synthetizing an interrupt in software
  95                 */
  96                eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
  97
  98                /* Re-trigger if needed */
  99                if ((eoi_val & 1) && __x_trig_page(xd))
 100                        __x_writeq(0, __x_trig_page(xd));
 101        }
 102}
 103
 104enum {
 105        scan_fetch,
 106        scan_poll,
 107        scan_eoi,
 108};
 109
 110static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
 111                                       u8 pending, int scan_type)
 112{
 113        u32 hirq = 0;
 114        u8 prio = 0xff;
 115
 116        /* Find highest pending priority */
 117        while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
 118                struct xive_q *q;
 119                u32 idx, toggle;
 120                __be32 *qpage;
 121
 122                /*
 123                 * If pending is 0 this will return 0xff which is what
 124                 * we want
 125                 */
 126                prio = ffs(pending) - 1;
 127
 128                /* Don't scan past the guest cppr */
 129                if (prio >= xc->cppr || prio > 7) {
 130                        if (xc->mfrr < xc->cppr) {
 131                                prio = xc->mfrr;
 132                                hirq = XICS_IPI;
 133                        }
 134                        break;
 135                }
 136
 137                /* Grab queue and pointers */
 138                q = &xc->queues[prio];
 139                idx = q->idx;
 140                toggle = q->toggle;
 141
 142                /*
 143                 * Snapshot the queue page. The test further down for EOI
 144                 * must use the same "copy" that was used by __xive_read_eq
 145                 * since qpage can be set concurrently and we don't want
 146                 * to miss an EOI.
 147                 */
 148                qpage = READ_ONCE(q->qpage);
 149
 150skip_ipi:
 151                /*
 152                 * Try to fetch from the queue. Will return 0 for a
 153                 * non-queueing priority (ie, qpage = 0).
 154                 */
 155                hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
 156
 157                /*
 158                 * If this was a signal for an MFFR change done by
 159                 * H_IPI we skip it. Additionally, if we were fetching
 160                 * we EOI it now, thus re-enabling reception of a new
 161                 * such signal.
 162                 *
 163                 * We also need to do that if prio is 0 and we had no
 164                 * page for the queue. In this case, we have non-queued
 165                 * IPI that needs to be EOId.
 166                 *
 167                 * This is safe because if we have another pending MFRR
 168                 * change that wasn't observed above, the Q bit will have
 169                 * been set and another occurrence of the IPI will trigger.
 170                 */
 171                if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
 172                        if (scan_type == scan_fetch) {
 173                                GLUE(X_PFX,source_eoi)(xc->vp_ipi,
 174                                                       &xc->vp_ipi_data);
 175                                q->idx = idx;
 176                                q->toggle = toggle;
 177                        }
 178                        /* Loop back on same queue with updated idx/toggle */
 179#ifdef XIVE_RUNTIME_CHECKS
 180                        WARN_ON(hirq && hirq != XICS_IPI);
 181#endif
 182                        if (hirq)
 183                                goto skip_ipi;
 184                }
 185
 186                /* If it's the dummy interrupt, continue searching */
 187                if (hirq == XICS_DUMMY)
 188                        goto skip_ipi;
 189
 190                /* Clear the pending bit if the queue is now empty */
 191                if (!hirq) {
 192                        pending &= ~(1 << prio);
 193
 194                        /*
 195                         * Check if the queue count needs adjusting due to
 196                         * interrupts being moved away.
 197                         */
 198                        if (atomic_read(&q->pending_count)) {
 199                                int p = atomic_xchg(&q->pending_count, 0);
 200                                if (p) {
 201#ifdef XIVE_RUNTIME_CHECKS
 202                                        WARN_ON(p > atomic_read(&q->count));
 203#endif
 204                                        atomic_sub(p, &q->count);
 205                                }
 206                        }
 207                }
 208
 209                /*
 210                 * If the most favoured prio we found pending is less
 211                 * favored (or equal) than a pending IPI, we return
 212                 * the IPI instead.
 213                 */
 214                if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
 215                        prio = xc->mfrr;
 216                        hirq = XICS_IPI;
 217                        break;
 218                }
 219
 220                /* If fetching, update queue pointers */
 221                if (scan_type == scan_fetch) {
 222                        q->idx = idx;
 223                        q->toggle = toggle;
 224                }
 225        }
 226
 227        /* If we are just taking a "peek", do nothing else */
 228        if (scan_type == scan_poll)
 229                return hirq;
 230
 231        /* Update the pending bits */
 232        xc->pending = pending;
 233
 234        /*
 235         * If this is an EOI that's it, no CPPR adjustment done here,
 236         * all we needed was cleanup the stale pending bits and check
 237         * if there's anything left.
 238         */
 239        if (scan_type == scan_eoi)
 240                return hirq;
 241
 242        /*
 243         * If we found an interrupt, adjust what the guest CPPR should
 244         * be as if we had just fetched that interrupt from HW.
 245         *
 246         * Note: This can only make xc->cppr smaller as the previous
 247         * loop will only exit with hirq != 0 if prio is lower than
 248         * the current xc->cppr. Thus we don't need to re-check xc->mfrr
 249         * for pending IPIs.
 250         */
 251        if (hirq)
 252                xc->cppr = prio;
 253        /*
 254         * If it was an IPI the HW CPPR might have been lowered too much
 255         * as the HW interrupt we use for IPIs is routed to priority 0.
 256         *
 257         * We re-sync it here.
 258         */
 259        if (xc->cppr != xc->hw_cppr) {
 260                xc->hw_cppr = xc->cppr;
 261                __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
 262        }
 263
 264        return hirq;
 265}
 266
 267X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
 268{
 269        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
 270        u8 old_cppr;
 271        u32 hirq;
 272
 273        pr_devel("H_XIRR\n");
 274
 275        xc->GLUE(X_STAT_PFX,h_xirr)++;
 276
 277        /* First collect pending bits from HW */
 278        GLUE(X_PFX,ack_pending)(xc);
 279
 280        pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
 281                 xc->pending, xc->hw_cppr, xc->cppr);
 282
 283        /* Grab previous CPPR and reverse map it */
 284        old_cppr = xive_prio_to_guest(xc->cppr);
 285
 286        /* Scan for actual interrupts */
 287        hirq = GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_fetch);
 288
 289        pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
 290                 hirq, xc->hw_cppr, xc->cppr);
 291
 292#ifdef XIVE_RUNTIME_CHECKS
 293        /* That should never hit */
 294        if (hirq & 0xff000000)
 295                pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
 296#endif
 297
 298        /*
 299         * XXX We could check if the interrupt is masked here and
 300         * filter it. If we chose to do so, we would need to do:
 301         *
 302         *    if (masked) {
 303         *        lock();
 304         *        if (masked) {
 305         *            old_Q = true;
 306         *            hirq = 0;
 307         *        }
 308         *        unlock();
 309         *    }
 310         */
 311
 312        /* Return interrupt and old CPPR in GPR4 */
 313        vcpu->arch.regs.gpr[4] = hirq | (old_cppr << 24);
 314
 315        return H_SUCCESS;
 316}
 317
 318X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server)
 319{
 320        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
 321        u8 pending = xc->pending;
 322        u32 hirq;
 323
 324        pr_devel("H_IPOLL(server=%ld)\n", server);
 325
 326        xc->GLUE(X_STAT_PFX,h_ipoll)++;
 327
 328        /* Grab the target VCPU if not the current one */
 329        if (xc->server_num != server) {
 330                vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
 331                if (!vcpu)
 332                        return H_PARAMETER;
 333                xc = vcpu->arch.xive_vcpu;
 334
 335                /* Scan all priorities */
 336                pending = 0xff;
 337        } else {
 338                /* Grab pending interrupt if any */
 339                __be64 qw1 = __x_readq(__x_tima + TM_QW1_OS);
 340                u8 pipr = be64_to_cpu(qw1) & 0xff;
 341                if (pipr < 8)
 342                        pending |= 1 << pipr;
 343        }
 344
 345        hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
 346
 347        /* Return interrupt and old CPPR in GPR4 */
 348        vcpu->arch.regs.gpr[4] = hirq | (xc->cppr << 24);
 349
 350        return H_SUCCESS;
 351}
 352
 353static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
 354{
 355        u8 pending, prio;
 356
 357        pending = xc->pending;
 358        if (xc->mfrr != 0xff) {
 359                if (xc->mfrr < 8)
 360                        pending |= 1 << xc->mfrr;
 361                else
 362                        pending |= 0x80;
 363        }
 364        if (!pending)
 365                return;
 366        prio = ffs(pending) - 1;
 367
 368        __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
 369}
 370
 371static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
 372                                               struct kvmppc_xive_vcpu *xc)
 373{
 374        unsigned int prio;
 375
 376        /* For each priority that is now masked */
 377        for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
 378                struct xive_q *q = &xc->queues[prio];
 379                struct kvmppc_xive_irq_state *state;
 380                struct kvmppc_xive_src_block *sb;
 381                u32 idx, toggle, entry, irq, hw_num;
 382                struct xive_irq_data *xd;
 383                __be32 *qpage;
 384                u16 src;
 385
 386                idx = q->idx;
 387                toggle = q->toggle;
 388                qpage = READ_ONCE(q->qpage);
 389                if (!qpage)
 390                        continue;
 391
 392                /* For each interrupt in the queue */
 393                for (;;) {
 394                        entry = be32_to_cpup(qpage + idx);
 395
 396                        /* No more ? */
 397                        if ((entry >> 31) == toggle)
 398                                break;
 399                        irq = entry & 0x7fffffff;
 400
 401                        /* Skip dummies and IPIs */
 402                        if (irq == XICS_DUMMY || irq == XICS_IPI)
 403                                goto next;
 404                        sb = kvmppc_xive_find_source(xive, irq, &src);
 405                        if (!sb)
 406                                goto next;
 407                        state = &sb->irq_state[src];
 408
 409                        /* Has it been rerouted ? */
 410                        if (xc->server_num == state->act_server)
 411                                goto next;
 412
 413                        /*
 414                         * Allright, it *has* been re-routed, kill it from
 415                         * the queue.
 416                         */
 417                        qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
 418
 419                        /* Find the HW interrupt */
 420                        kvmppc_xive_select_irq(state, &hw_num, &xd);
 421
 422                        /* If it's not an LSI, set PQ to 11 the EOI will force a resend */
 423                        if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
 424                                GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
 425
 426                        /* EOI the source */
 427                        GLUE(X_PFX,source_eoi)(hw_num, xd);
 428
 429                next:
 430                        idx = (idx + 1) & q->msk;
 431                        if (idx == 0)
 432                                toggle ^= 1;
 433                }
 434        }
 435}
 436
 437X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
 438{
 439        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
 440        struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
 441        u8 old_cppr;
 442
 443        pr_devel("H_CPPR(cppr=%ld)\n", cppr);
 444
 445        xc->GLUE(X_STAT_PFX,h_cppr)++;
 446
 447        /* Map CPPR */
 448        cppr = xive_prio_from_guest(cppr);
 449
 450        /* Remember old and update SW state */
 451        old_cppr = xc->cppr;
 452        xc->cppr = cppr;
 453
 454        /*
 455         * Order the above update of xc->cppr with the subsequent
 456         * read of xc->mfrr inside push_pending_to_hw()
 457         */
 458        smp_mb();
 459
 460        if (cppr > old_cppr) {
 461                /*
 462                 * We are masking less, we need to look for pending things
 463                 * to deliver and set VP pending bits accordingly to trigger
 464                 * a new interrupt otherwise we might miss MFRR changes for
 465                 * which we have optimized out sending an IPI signal.
 466                 */
 467                GLUE(X_PFX,push_pending_to_hw)(xc);
 468        } else {
 469                /*
 470                 * We are masking more, we need to check the queue for any
 471                 * interrupt that has been routed to another CPU, take
 472                 * it out (replace it with the dummy) and retrigger it.
 473                 *
 474                 * This is necessary since those interrupts may otherwise
 475                 * never be processed, at least not until this CPU restores
 476                 * its CPPR.
 477                 *
 478                 * This is in theory racy vs. HW adding new interrupts to
 479                 * the queue. In practice this works because the interesting
 480                 * cases are when the guest has done a set_xive() to move the
 481                 * interrupt away, which flushes the xive, followed by the
 482                 * target CPU doing a H_CPPR. So any new interrupt coming into
 483                 * the queue must still be routed to us and isn't a source
 484                 * of concern.
 485                 */
 486                GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
 487        }
 488
 489        /* Apply new CPPR */
 490        xc->hw_cppr = cppr;
 491        __x_writeb(cppr, __x_tima + TM_QW1_OS + TM_CPPR);
 492
 493        return H_SUCCESS;
 494}
 495
 496X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
 497{
 498        struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
 499        struct kvmppc_xive_src_block *sb;
 500        struct kvmppc_xive_irq_state *state;
 501        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
 502        struct xive_irq_data *xd;
 503        u8 new_cppr = xirr >> 24;
 504        u32 irq = xirr & 0x00ffffff, hw_num;
 505        u16 src;
 506        int rc = 0;
 507
 508        pr_devel("H_EOI(xirr=%08lx)\n", xirr);
 509
 510        xc->GLUE(X_STAT_PFX,h_eoi)++;
 511
 512        xc->cppr = xive_prio_from_guest(new_cppr);
 513
 514        /*
 515         * IPIs are synthetized from MFRR and thus don't need
 516         * any special EOI handling. The underlying interrupt
 517         * used to signal MFRR changes is EOId when fetched from
 518         * the queue.
 519         */
 520        if (irq == XICS_IPI || irq == 0) {
 521                /*
 522                 * This barrier orders the setting of xc->cppr vs.
 523                 * subsquent test of xc->mfrr done inside
 524                 * scan_interrupts and push_pending_to_hw
 525                 */
 526                smp_mb();
 527                goto bail;
 528        }
 529
 530        /* Find interrupt source */
 531        sb = kvmppc_xive_find_source(xive, irq, &src);
 532        if (!sb) {
 533                pr_devel(" source not found !\n");
 534                rc = H_PARAMETER;
 535                /* Same as above */
 536                smp_mb();
 537                goto bail;
 538        }
 539        state = &sb->irq_state[src];
 540        kvmppc_xive_select_irq(state, &hw_num, &xd);
 541
 542        state->in_eoi = true;
 543
 544        /*
 545         * This barrier orders both setting of in_eoi above vs,
 546         * subsequent test of guest_priority, and the setting
 547         * of xc->cppr vs. subsquent test of xc->mfrr done inside
 548         * scan_interrupts and push_pending_to_hw
 549         */
 550        smp_mb();
 551
 552again:
 553        if (state->guest_priority == MASKED) {
 554                arch_spin_lock(&sb->lock);
 555                if (state->guest_priority != MASKED) {
 556                        arch_spin_unlock(&sb->lock);
 557                        goto again;
 558                }
 559                pr_devel(" EOI on saved P...\n");
 560
 561                /* Clear old_p, that will cause unmask to perform an EOI */
 562                state->old_p = false;
 563
 564                arch_spin_unlock(&sb->lock);
 565        } else {
 566                pr_devel(" EOI on source...\n");
 567
 568                /* Perform EOI on the source */
 569                GLUE(X_PFX,source_eoi)(hw_num, xd);
 570
 571                /* If it's an emulated LSI, check level and resend */
 572                if (state->lsi && state->asserted)
 573                        __x_writeq(0, __x_trig_page(xd));
 574
 575        }
 576
 577        /*
 578         * This barrier orders the above guest_priority check
 579         * and spin_lock/unlock with clearing in_eoi below.
 580         *
 581         * It also has to be a full mb() as it must ensure
 582         * the MMIOs done in source_eoi() are completed before
 583         * state->in_eoi is visible.
 584         */
 585        mb();
 586        state->in_eoi = false;
 587bail:
 588
 589        /* Re-evaluate pending IRQs and update HW */
 590        GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_eoi);
 591        GLUE(X_PFX,push_pending_to_hw)(xc);
 592        pr_devel(" after scan pending=%02x\n", xc->pending);
 593
 594        /* Apply new CPPR */
 595        xc->hw_cppr = xc->cppr;
 596        __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
 597
 598        return rc;
 599}
 600
 601X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
 602                               unsigned long mfrr)
 603{
 604        struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
 605
 606        pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
 607
 608        xc->GLUE(X_STAT_PFX,h_ipi)++;
 609
 610        /* Find target */
 611        vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
 612        if (!vcpu)
 613                return H_PARAMETER;
 614        xc = vcpu->arch.xive_vcpu;
 615
 616        /* Locklessly write over MFRR */
 617        xc->mfrr = mfrr;
 618
 619        /*
 620         * The load of xc->cppr below and the subsequent MMIO store
 621         * to the IPI must happen after the above mfrr update is
 622         * globally visible so that:
 623         *
 624         * - Synchronize with another CPU doing an H_EOI or a H_CPPR
 625         *   updating xc->cppr then reading xc->mfrr.
 626         *
 627         * - The target of the IPI sees the xc->mfrr update
 628         */
 629        mb();
 630
 631        /* Shoot the IPI if most favored than target cppr */
 632        if (mfrr < xc->cppr)
 633                __x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
 634
 635        return H_SUCCESS;
 636}
 637