linux/arch/powerpc/sysdev/xive/common.c
<<
>>
Prefs
   1/*
   2 * Copyright 2016,2017 IBM Corporation.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public License
   6 * as published by the Free Software Foundation; either version
   7 * 2 of the License, or (at your option) any later version.
   8 */
   9
  10#define pr_fmt(fmt) "xive: " fmt
  11
  12#include <linux/types.h>
  13#include <linux/threads.h>
  14#include <linux/kernel.h>
  15#include <linux/irq.h>
  16#include <linux/debugfs.h>
  17#include <linux/smp.h>
  18#include <linux/interrupt.h>
  19#include <linux/seq_file.h>
  20#include <linux/init.h>
  21#include <linux/cpu.h>
  22#include <linux/of.h>
  23#include <linux/slab.h>
  24#include <linux/spinlock.h>
  25#include <linux/msi.h>
  26#include <linux/vmalloc.h>
  27
  28#include <asm/debugfs.h>
  29#include <asm/prom.h>
  30#include <asm/io.h>
  31#include <asm/smp.h>
  32#include <asm/machdep.h>
  33#include <asm/irq.h>
  34#include <asm/errno.h>
  35#include <asm/xive.h>
  36#include <asm/xive-regs.h>
  37#include <asm/xmon.h>
  38
  39#include "xive-internal.h"
  40
  41#undef DEBUG_FLUSH
  42#undef DEBUG_ALL
  43
  44#ifdef DEBUG_ALL
  45#define DBG_VERBOSE(fmt, ...)   pr_devel("cpu %d - " fmt, \
  46                                         smp_processor_id(), ## __VA_ARGS__)
  47#else
  48#define DBG_VERBOSE(fmt...)     do { } while(0)
  49#endif
  50
  51bool __xive_enabled;
  52EXPORT_SYMBOL_GPL(__xive_enabled);
  53bool xive_cmdline_disabled;
  54
  55/* We use only one priority for now */
  56static u8 xive_irq_priority;
  57
  58/* TIMA exported to KVM */
  59void __iomem *xive_tima;
  60EXPORT_SYMBOL_GPL(xive_tima);
  61u32 xive_tima_offset;
  62
  63/* Backend ops */
  64static const struct xive_ops *xive_ops;
  65
  66/* Our global interrupt domain */
  67static struct irq_domain *xive_irq_domain;
  68
  69#ifdef CONFIG_SMP
  70/* The IPIs all use the same logical irq number */
  71static u32 xive_ipi_irq;
  72#endif
  73
  74/* Xive state for each CPU */
  75static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu);
  76
  77/* An invalid CPU target */
  78#define XIVE_INVALID_TARGET     (-1)
  79
  80/*
  81 * Read the next entry in a queue, return its content if it's valid
  82 * or 0 if there is no new entry.
  83 *
  84 * The queue pointer is moved forward unless "just_peek" is set
  85 */
  86static u32 xive_read_eq(struct xive_q *q, bool just_peek)
  87{
  88        u32 cur;
  89
  90        if (!q->qpage)
  91                return 0;
  92        cur = be32_to_cpup(q->qpage + q->idx);
  93
  94        /* Check valid bit (31) vs current toggle polarity */
  95        if ((cur >> 31) == q->toggle)
  96                return 0;
  97
  98        /* If consuming from the queue ... */
  99        if (!just_peek) {
 100                /* Next entry */
 101                q->idx = (q->idx + 1) & q->msk;
 102
 103                /* Wrap around: flip valid toggle */
 104                if (q->idx == 0)
 105                        q->toggle ^= 1;
 106        }
 107        /* Mask out the valid bit (31) */
 108        return cur & 0x7fffffff;
 109}
 110
 111/*
 112 * Scans all the queue that may have interrupts in them
 113 * (based on "pending_prio") in priority order until an
 114 * interrupt is found or all the queues are empty.
 115 *
 116 * Then updates the CPPR (Current Processor Priority
 117 * Register) based on the most favored interrupt found
 118 * (0xff if none) and return what was found (0 if none).
 119 *
 120 * If just_peek is set, return the most favored pending
 121 * interrupt if any but don't update the queue pointers.
 122 *
 123 * Note: This function can operate generically on any number
 124 * of queues (up to 8). The current implementation of the XIVE
 125 * driver only uses a single queue however.
 126 *
 127 * Note2: This will also "flush" "the pending_count" of a queue
 128 * into the "count" when that queue is observed to be empty.
 129 * This is used to keep track of the amount of interrupts
 130 * targetting a queue. When an interrupt is moved away from
 131 * a queue, we only decrement that queue count once the queue
 132 * has been observed empty to avoid races.
 133 */
 134static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
 135{
 136        u32 irq = 0;
 137        u8 prio = 0;
 138
 139        /* Find highest pending priority */
 140        while (xc->pending_prio != 0) {
 141                struct xive_q *q;
 142
 143                prio = ffs(xc->pending_prio) - 1;
 144                DBG_VERBOSE("scan_irq: trying prio %d\n", prio);
 145
 146                /* Try to fetch */
 147                irq = xive_read_eq(&xc->queue[prio], just_peek);
 148
 149                /* Found something ? That's it */
 150                if (irq) {
 151                        if (just_peek || irq_to_desc(irq))
 152                                break;
 153                        /*
 154                         * We should never get here; if we do then we must
 155                         * have failed to synchronize the interrupt properly
 156                         * when shutting it down.
 157                         */
 158                        pr_crit("xive: got interrupt %d without descriptor, dropping\n",
 159                                irq);
 160                        WARN_ON(1);
 161                        continue;
 162                }
 163
 164                /* Clear pending bits */
 165                xc->pending_prio &= ~(1 << prio);
 166
 167                /*
 168                 * Check if the queue count needs adjusting due to
 169                 * interrupts being moved away. See description of
 170                 * xive_dec_target_count()
 171                 */
 172                q = &xc->queue[prio];
 173                if (atomic_read(&q->pending_count)) {
 174                        int p = atomic_xchg(&q->pending_count, 0);
 175                        if (p) {
 176                                WARN_ON(p > atomic_read(&q->count));
 177                                atomic_sub(p, &q->count);
 178                        }
 179                }
 180        }
 181
 182        /* If nothing was found, set CPPR to 0xff */
 183        if (irq == 0)
 184                prio = 0xff;
 185
 186        /* Update HW CPPR to match if necessary */
 187        if (prio != xc->cppr) {
 188                DBG_VERBOSE("scan_irq: adjusting CPPR to %d\n", prio);
 189                xc->cppr = prio;
 190                out_8(xive_tima + xive_tima_offset + TM_CPPR, prio);
 191        }
 192
 193        return irq;
 194}
 195
 196/*
 197 * This is used to perform the magic loads from an ESB
 198 * described in xive.h
 199 */
 200static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset)
 201{
 202        u64 val;
 203
 204        if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
 205                offset |= XIVE_ESB_LD_ST_MO;
 206
 207        /* Handle HW errata */
 208        if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
 209                offset |= offset << 4;
 210
 211        if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
 212                val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0);
 213        else
 214                val = in_be64(xd->eoi_mmio + offset);
 215
 216        return (u8)val;
 217}
 218
 219static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data)
 220{
 221        /* Handle HW errata */
 222        if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
 223                offset |= offset << 4;
 224
 225        if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
 226                xive_ops->esb_rw(xd->hw_irq, offset, data, 1);
 227        else
 228                out_be64(xd->eoi_mmio + offset, data);
 229}
 230
 231#ifdef CONFIG_XMON
 232static notrace void xive_dump_eq(const char *name, struct xive_q *q)
 233{
 234        u32 i0, i1, idx;
 235
 236        if (!q->qpage)
 237                return;
 238        idx = q->idx;
 239        i0 = be32_to_cpup(q->qpage + idx);
 240        idx = (idx + 1) & q->msk;
 241        i1 = be32_to_cpup(q->qpage + idx);
 242        xmon_printf("%s idx=%d T=%d %08x %08x ...", name,
 243                     q->idx, q->toggle, i0, i1);
 244}
 245
 246notrace void xmon_xive_do_dump(int cpu)
 247{
 248        struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
 249
 250        xmon_printf("CPU %d:", cpu);
 251        if (xc) {
 252                xmon_printf("pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr);
 253
 254#ifdef CONFIG_SMP
 255                {
 256                        u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
 257
 258                        xmon_printf("IPI=0x%08x PQ=%c%c ", xc->hw_ipi,
 259                                    val & XIVE_ESB_VAL_P ? 'P' : '-',
 260                                    val & XIVE_ESB_VAL_Q ? 'Q' : '-');
 261                }
 262#endif
 263                xive_dump_eq("EQ", &xc->queue[xive_irq_priority]);
 264        }
 265        xmon_printf("\n");
 266}
 267
 268int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
 269{
 270        int rc;
 271        u32 target;
 272        u8 prio;
 273        u32 lirq;
 274
 275        rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
 276        if (rc) {
 277                xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
 278                return rc;
 279        }
 280
 281        xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
 282                    hw_irq, target, prio, lirq);
 283
 284        if (d) {
 285                struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
 286                u64 val = xive_esb_read(xd, XIVE_ESB_GET);
 287
 288                xmon_printf("PQ=%c%c",
 289                            val & XIVE_ESB_VAL_P ? 'P' : '-',
 290                            val & XIVE_ESB_VAL_Q ? 'Q' : '-');
 291        }
 292
 293        xmon_printf("\n");
 294        return 0;
 295}
 296
 297#endif /* CONFIG_XMON */
 298
 299static unsigned int xive_get_irq(void)
 300{
 301        struct xive_cpu *xc = __this_cpu_read(xive_cpu);
 302        u32 irq;
 303
 304        /*
 305         * This can be called either as a result of a HW interrupt or
 306         * as a "replay" because EOI decided there was still something
 307         * in one of the queues.
 308         *
 309         * First we perform an ACK cycle in order to update our mask
 310         * of pending priorities. This will also have the effect of
 311         * updating the CPPR to the most favored pending interrupts.
 312         *
 313         * In the future, if we have a way to differentiate a first
 314         * entry (on HW interrupt) from a replay triggered by EOI,
 315         * we could skip this on replays unless we soft-mask tells us
 316         * that a new HW interrupt occurred.
 317         */
 318        xive_ops->update_pending(xc);
 319
 320        DBG_VERBOSE("get_irq: pending=%02x\n", xc->pending_prio);
 321
 322        /* Scan our queue(s) for interrupts */
 323        irq = xive_scan_interrupts(xc, false);
 324
 325        DBG_VERBOSE("get_irq: got irq 0x%x, new pending=0x%02x\n",
 326            irq, xc->pending_prio);
 327
 328        /* Return pending interrupt if any */
 329        if (irq == XIVE_BAD_IRQ)
 330                return 0;
 331        return irq;
 332}
 333
 334/*
 335 * After EOI'ing an interrupt, we need to re-check the queue
 336 * to see if another interrupt is pending since multiple
 337 * interrupts can coalesce into a single notification to the
 338 * CPU.
 339 *
 340 * If we find that there is indeed more in there, we call
 341 * force_external_irq_replay() to make Linux synthetize an
 342 * external interrupt on the next call to local_irq_restore().
 343 */
 344static void xive_do_queue_eoi(struct xive_cpu *xc)
 345{
 346        if (xive_scan_interrupts(xc, true) != 0) {
 347                DBG_VERBOSE("eoi: pending=0x%02x\n", xc->pending_prio);
 348                force_external_irq_replay();
 349        }
 350}
 351
 352/*
 353 * EOI an interrupt at the source. There are several methods
 354 * to do this depending on the HW version and source type
 355 */
 356void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
 357{
 358        xd->stale_p = false;
 359        /* If the XIVE supports the new "store EOI facility, use it */
 360        if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
 361                xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
 362        else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
 363                /*
 364                 * The FW told us to call it. This happens for some
 365                 * interrupt sources that need additional HW whacking
 366                 * beyond the ESB manipulation. For example LPC interrupts
 367                 * on P9 DD1.0 needed a latch to be clared in the LPC bridge
 368                 * itself. The Firmware will take care of it.
 369                 */
 370                if (WARN_ON_ONCE(!xive_ops->eoi))
 371                        return;
 372                xive_ops->eoi(hw_irq);
 373        } else {
 374                u8 eoi_val;
 375
 376                /*
 377                 * Otherwise for EOI, we use the special MMIO that does
 378                 * a clear of both P and Q and returns the old Q,
 379                 * except for LSIs where we use the "EOI cycle" special
 380                 * load.
 381                 *
 382                 * This allows us to then do a re-trigger if Q was set
 383                 * rather than synthesizing an interrupt in software
 384                 *
 385                 * For LSIs the HW EOI cycle is used rather than PQ bits,
 386                 * as they are automatically re-triggred in HW when still
 387                 * pending.
 388                 */
 389                if (xd->flags & XIVE_IRQ_FLAG_LSI)
 390                        xive_esb_read(xd, XIVE_ESB_LOAD_EOI);
 391                else {
 392                        eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
 393                        DBG_VERBOSE("eoi_val=%x\n", eoi_val);
 394
 395                        /* Re-trigger if needed */
 396                        if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio)
 397                                out_be64(xd->trig_mmio, 0);
 398                }
 399        }
 400}
 401
 402/* irq_chip eoi callback, called with irq descriptor lock held */
 403static void xive_irq_eoi(struct irq_data *d)
 404{
 405        struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
 406        struct xive_cpu *xc = __this_cpu_read(xive_cpu);
 407
 408        DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n",
 409                    d->irq, irqd_to_hwirq(d), xc->pending_prio);
 410
 411        /*
 412         * EOI the source if it hasn't been disabled and hasn't
 413         * been passed-through to a KVM guest
 414         */
 415        if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
 416            !(xd->flags & XIVE_IRQ_NO_EOI))
 417                xive_do_source_eoi(irqd_to_hwirq(d), xd);
 418        else
 419                xd->stale_p = true;
 420
 421        /*
 422         * Clear saved_p to indicate that it's no longer occupying
 423         * a queue slot on the target queue
 424         */
 425        xd->saved_p = false;
 426
 427        /* Check for more work in the queue */
 428        xive_do_queue_eoi(xc);
 429}
 430
 431/*
 432 * Helper used to mask and unmask an interrupt source. This
 433 * is only called for normal interrupts that do not require
 434 * masking/unmasking via firmware.
 435 */
 436static void xive_do_source_set_mask(struct xive_irq_data *xd,
 437                                    bool mask)
 438{
 439        u64 val;
 440
 441        /*
 442         * If the interrupt had P set, it may be in a queue.
 443         *
 444         * We need to make sure we don't re-enable it until it
 445         * has been fetched from that queue and EOId. We keep
 446         * a copy of that P state and use it to restore the
 447         * ESB accordingly on unmask.
 448         */
 449        if (mask) {
 450                val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
 451                if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P))
 452                        xd->saved_p = true;
 453                xd->stale_p = false;
 454        } else if (xd->saved_p) {
 455                xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
 456                xd->saved_p = false;
 457        } else {
 458                xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
 459                xd->stale_p = false;
 460        }
 461}
 462
 463/*
 464 * Try to chose "cpu" as a new interrupt target. Increments
 465 * the queue accounting for that target if it's not already
 466 * full.
 467 */
 468static bool xive_try_pick_target(int cpu)
 469{
 470        struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
 471        struct xive_q *q = &xc->queue[xive_irq_priority];
 472        int max;
 473
 474        /*
 475         * Calculate max number of interrupts in that queue.
 476         *
 477         * We leave a gap of 1 just in case...
 478         */
 479        max = (q->msk + 1) - 1;
 480        return !!atomic_add_unless(&q->count, 1, max);
 481}
 482
 483/*
 484 * Un-account an interrupt for a target CPU. We don't directly
 485 * decrement q->count since the interrupt might still be present
 486 * in the queue.
 487 *
 488 * Instead increment a separate counter "pending_count" which
 489 * will be substracted from "count" later when that CPU observes
 490 * the queue to be empty.
 491 */
 492static void xive_dec_target_count(int cpu)
 493{
 494        struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
 495        struct xive_q *q = &xc->queue[xive_irq_priority];
 496
 497        if (unlikely(WARN_ON(cpu < 0 || !xc))) {
 498                pr_err("%s: cpu=%d xc=%p\n", __func__, cpu, xc);
 499                return;
 500        }
 501
 502        /*
 503         * We increment the "pending count" which will be used
 504         * to decrement the target queue count whenever it's next
 505         * processed and found empty. This ensure that we don't
 506         * decrement while we still have the interrupt there
 507         * occupying a slot.
 508         */
 509        atomic_inc(&q->pending_count);
 510}
 511
 512/* Find a tentative CPU target in a CPU mask */
 513static int xive_find_target_in_mask(const struct cpumask *mask,
 514                                    unsigned int fuzz)
 515{
 516        int cpu, first, num, i;
 517
 518        /* Pick up a starting point CPU in the mask based on  fuzz */
 519        num = min_t(int, cpumask_weight(mask), nr_cpu_ids);
 520        first = fuzz % num;
 521
 522        /* Locate it */
 523        cpu = cpumask_first(mask);
 524        for (i = 0; i < first && cpu < nr_cpu_ids; i++)
 525                cpu = cpumask_next(cpu, mask);
 526
 527        /* Sanity check */
 528        if (WARN_ON(cpu >= nr_cpu_ids))
 529                cpu = cpumask_first(cpu_online_mask);
 530
 531        /* Remember first one to handle wrap-around */
 532        first = cpu;
 533
 534        /*
 535         * Now go through the entire mask until we find a valid
 536         * target.
 537         */
 538        do {
 539                /*
 540                 * We re-check online as the fallback case passes us
 541                 * an untested affinity mask
 542                 */
 543                if (cpu_online(cpu) && xive_try_pick_target(cpu))
 544                        return cpu;
 545                cpu = cpumask_next(cpu, mask);
 546                /* Wrap around */
 547                if (cpu >= nr_cpu_ids)
 548                        cpu = cpumask_first(mask);
 549        } while (cpu != first);
 550
 551        return -1;
 552}
 553
 554/*
 555 * Pick a target CPU for an interrupt. This is done at
 556 * startup or if the affinity is changed in a way that
 557 * invalidates the current target.
 558 */
 559static int xive_pick_irq_target(struct irq_data *d,
 560                                const struct cpumask *affinity)
 561{
 562        static unsigned int fuzz;
 563        struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
 564        cpumask_var_t mask;
 565        int cpu = -1;
 566
 567        /*
 568         * If we have chip IDs, first we try to build a mask of
 569         * CPUs matching the CPU and find a target in there
 570         */
 571        if (xd->src_chip != XIVE_INVALID_CHIP_ID &&
 572                zalloc_cpumask_var(&mask, GFP_ATOMIC)) {
 573                /* Build a mask of matching chip IDs */
 574                for_each_cpu_and(cpu, affinity, cpu_online_mask) {
 575                        struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
 576                        if (xc->chip_id == xd->src_chip)
 577                                cpumask_set_cpu(cpu, mask);
 578                }
 579                /* Try to find a target */
 580                if (cpumask_empty(mask))
 581                        cpu = -1;
 582                else
 583                        cpu = xive_find_target_in_mask(mask, fuzz++);
 584                free_cpumask_var(mask);
 585                if (cpu >= 0)
 586                        return cpu;
 587                fuzz--;
 588        }
 589
 590        /* No chip IDs, fallback to using the affinity mask */
 591        return xive_find_target_in_mask(affinity, fuzz++);
 592}
 593
 594static unsigned int xive_irq_startup(struct irq_data *d)
 595{
 596        struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
 597        unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 598        int target, rc;
 599
 600        xd->saved_p = false;
 601        xd->stale_p = false;
 602        pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
 603                 d->irq, hw_irq, d);
 604
 605#ifdef CONFIG_PCI_MSI
 606        /*
 607         * The generic MSI code returns with the interrupt disabled on the
 608         * card, using the MSI mask bits. Firmware doesn't appear to unmask
 609         * at that level, so we do it here by hand.
 610         */
 611        if (irq_data_get_msi_desc(d))
 612                pci_msi_unmask_irq(d);
 613#endif
 614
 615        /* Pick a target */
 616        target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d));
 617        if (target == XIVE_INVALID_TARGET) {
 618                /* Try again breaking affinity */
 619                target = xive_pick_irq_target(d, cpu_online_mask);
 620                if (target == XIVE_INVALID_TARGET)
 621                        return -ENXIO;
 622                pr_warn("irq %d started with broken affinity\n", d->irq);
 623        }
 624
 625        /* Sanity check */
 626        if (WARN_ON(target == XIVE_INVALID_TARGET ||
 627                    target >= nr_cpu_ids))
 628                target = smp_processor_id();
 629
 630        xd->target = target;
 631
 632        /*
 633         * Configure the logical number to be the Linux IRQ number
 634         * and set the target queue
 635         */
 636        rc = xive_ops->configure_irq(hw_irq,
 637                                     get_hard_smp_processor_id(target),
 638                                     xive_irq_priority, d->irq);
 639        if (rc)
 640                return rc;
 641
 642        /* Unmask the ESB */
 643        xive_do_source_set_mask(xd, false);
 644
 645        return 0;
 646}
 647
 648/* called with irq descriptor lock held */
 649static void xive_irq_shutdown(struct irq_data *d)
 650{
 651        struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
 652        unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 653
 654        pr_devel("xive_irq_shutdown: irq %d [0x%x] data @%p\n",
 655                 d->irq, hw_irq, d);
 656
 657        if (WARN_ON(xd->target == XIVE_INVALID_TARGET))
 658                return;
 659
 660        /* Mask the interrupt at the source */
 661        xive_do_source_set_mask(xd, true);
 662
 663        /*
 664         * Mask the interrupt in HW in the IVT/EAS and set the number
 665         * to be the "bad" IRQ number
 666         */
 667        xive_ops->configure_irq(hw_irq,
 668                                get_hard_smp_processor_id(xd->target),
 669                                0xff, XIVE_BAD_IRQ);
 670
 671        xive_dec_target_count(xd->target);
 672        xd->target = XIVE_INVALID_TARGET;
 673}
 674
 675static void xive_irq_unmask(struct irq_data *d)
 676{
 677        struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
 678
 679        pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd);
 680
 681        /*
 682         * This is a workaround for PCI LSI problems on P9, for
 683         * these, we call FW to set the mask. The problems might
 684         * be fixed by P9 DD2.0, if that is the case, firmware
 685         * will no longer set that flag.
 686         */
 687        if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) {
 688                unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 689                xive_ops->configure_irq(hw_irq,
 690                                        get_hard_smp_processor_id(xd->target),
 691                                        xive_irq_priority, d->irq);
 692                return;
 693        }
 694
 695        xive_do_source_set_mask(xd, false);
 696}
 697
 698static void xive_irq_mask(struct irq_data *d)
 699{
 700        struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
 701
 702        pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd);
 703
 704        /*
 705         * This is a workaround for PCI LSI problems on P9, for
 706         * these, we call OPAL to set the mask. The problems might
 707         * be fixed by P9 DD2.0, if that is the case, firmware
 708         * will no longer set that flag.
 709         */
 710        if (xd->flags & XIVE_IRQ_FLAG_MASK_FW) {
 711                unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 712                xive_ops->configure_irq(hw_irq,
 713                                        get_hard_smp_processor_id(xd->target),
 714                                        0xff, d->irq);
 715                return;
 716        }
 717
 718        xive_do_source_set_mask(xd, true);
 719}
 720
 721static int xive_irq_set_affinity(struct irq_data *d,
 722                                 const struct cpumask *cpumask,
 723                                 bool force)
 724{
 725        struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
 726        unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 727        u32 target, old_target;
 728        int rc = 0;
 729
 730        pr_devel("xive_irq_set_affinity: irq %d\n", d->irq);
 731
 732        /* Is this valid ? */
 733        if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
 734                return -EINVAL;
 735
 736        /* Don't do anything if the interrupt isn't started */
 737        if (!irqd_is_started(d))
 738                return IRQ_SET_MASK_OK;
 739
 740        /*
 741         * If existing target is already in the new mask, and is
 742         * online then do nothing.
 743         */
 744        if (xd->target != XIVE_INVALID_TARGET &&
 745            cpu_online(xd->target) &&
 746            cpumask_test_cpu(xd->target, cpumask))
 747                return IRQ_SET_MASK_OK;
 748
 749        /* Pick a new target */
 750        target = xive_pick_irq_target(d, cpumask);
 751
 752        /* No target found */
 753        if (target == XIVE_INVALID_TARGET)
 754                return -ENXIO;
 755
 756        /* Sanity check */
 757        if (WARN_ON(target >= nr_cpu_ids))
 758                target = smp_processor_id();
 759
 760        old_target = xd->target;
 761
 762        /*
 763         * Only configure the irq if it's not currently passed-through to
 764         * a KVM guest
 765         */
 766        if (!irqd_is_forwarded_to_vcpu(d))
 767                rc = xive_ops->configure_irq(hw_irq,
 768                                             get_hard_smp_processor_id(target),
 769                                             xive_irq_priority, d->irq);
 770        if (rc < 0) {
 771                pr_err("Error %d reconfiguring irq %d\n", rc, d->irq);
 772                return rc;
 773        }
 774
 775        pr_devel("  target: 0x%x\n", target);
 776        xd->target = target;
 777
 778        /* Give up previous target */
 779        if (old_target != XIVE_INVALID_TARGET)
 780            xive_dec_target_count(old_target);
 781
 782        return IRQ_SET_MASK_OK;
 783}
 784
 785static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type)
 786{
 787        struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
 788
 789        /*
 790         * We only support these. This has really no effect other than setting
 791         * the corresponding descriptor bits mind you but those will in turn
 792         * affect the resend function when re-enabling an edge interrupt.
 793         *
 794         * Set set the default to edge as explained in map().
 795         */
 796        if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
 797                flow_type = IRQ_TYPE_EDGE_RISING;
 798
 799        if (flow_type != IRQ_TYPE_EDGE_RISING &&
 800            flow_type != IRQ_TYPE_LEVEL_LOW)
 801                return -EINVAL;
 802
 803        irqd_set_trigger_type(d, flow_type);
 804
 805        /*
 806         * Double check it matches what the FW thinks
 807         *
 808         * NOTE: We don't know yet if the PAPR interface will provide
 809         * the LSI vs MSI information apart from the device-tree so
 810         * this check might have to move into an optional backend call
 811         * that is specific to the native backend
 812         */
 813        if ((flow_type == IRQ_TYPE_LEVEL_LOW) !=
 814            !!(xd->flags & XIVE_IRQ_FLAG_LSI)) {
 815                pr_warn("Interrupt %d (HW 0x%x) type mismatch, Linux says %s, FW says %s\n",
 816                        d->irq, (u32)irqd_to_hwirq(d),
 817                        (flow_type == IRQ_TYPE_LEVEL_LOW) ? "Level" : "Edge",
 818                        (xd->flags & XIVE_IRQ_FLAG_LSI) ? "Level" : "Edge");
 819        }
 820
 821        return IRQ_SET_MASK_OK_NOCOPY;
 822}
 823
 824static int xive_irq_retrigger(struct irq_data *d)
 825{
 826        struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
 827
 828        /* This should be only for MSIs */
 829        if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
 830                return 0;
 831
 832        /*
 833         * To perform a retrigger, we first set the PQ bits to
 834         * 11, then perform an EOI.
 835         */
 836        xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
 837
 838        /*
 839         * Note: We pass "0" to the hw_irq argument in order to
 840         * avoid calling into the backend EOI code which we don't
 841         * want to do in the case of a re-trigger. Backends typically
 842         * only do EOI for LSIs anyway.
 843         */
 844        xive_do_source_eoi(0, xd);
 845
 846        return 1;
 847}
 848
 849/*
 850 * Caller holds the irq descriptor lock, so this won't be called
 851 * concurrently with xive_get_irqchip_state on the same interrupt.
 852 */
 853static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
 854{
 855        struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
 856        unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
 857        int rc;
 858        u8 pq;
 859
 860        /*
 861         * We only support this on interrupts that do not require
 862         * firmware calls for masking and unmasking
 863         */
 864        if (xd->flags & XIVE_IRQ_FLAG_MASK_FW)
 865                return -EIO;
 866
 867        /*
 868         * This is called by KVM with state non-NULL for enabling
 869         * pass-through or NULL for disabling it
 870         */
 871        if (state) {
 872                irqd_set_forwarded_to_vcpu(d);
 873
 874                /* Set it to PQ=10 state to prevent further sends */
 875                pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
 876                if (!xd->stale_p) {
 877                        xd->saved_p = !!(pq & XIVE_ESB_VAL_P);
 878                        xd->stale_p = !xd->saved_p;
 879                }
 880
 881                /* No target ? nothing to do */
 882                if (xd->target == XIVE_INVALID_TARGET) {
 883                        /*
 884                         * An untargetted interrupt should have been
 885                         * also masked at the source
 886                         */
 887                        WARN_ON(xd->saved_p);
 888
 889                        return 0;
 890                }
 891
 892                /*
 893                 * If P was set, adjust state to PQ=11 to indicate
 894                 * that a resend is needed for the interrupt to reach
 895                 * the guest. Also remember the value of P.
 896                 *
 897                 * This also tells us that it's in flight to a host queue
 898                 * or has already been fetched but hasn't been EOIed yet
 899                 * by the host. This it's potentially using up a host
 900                 * queue slot. This is important to know because as long
 901                 * as this is the case, we must not hard-unmask it when
 902                 * "returning" that interrupt to the host.
 903                 *
 904                 * This saved_p is cleared by the host EOI, when we know
 905                 * for sure the queue slot is no longer in use.
 906                 */
 907                if (xd->saved_p) {
 908                        xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
 909
 910                        /*
 911                         * Sync the XIVE source HW to ensure the interrupt
 912                         * has gone through the EAS before we change its
 913                         * target to the guest. That should guarantee us
 914                         * that we *will* eventually get an EOI for it on
 915                         * the host. Otherwise there would be a small window
 916                         * for P to be seen here but the interrupt going
 917                         * to the guest queue.
 918                         */
 919                        if (xive_ops->sync_source)
 920                                xive_ops->sync_source(hw_irq);
 921                }
 922        } else {
 923                irqd_clr_forwarded_to_vcpu(d);
 924
 925                /* No host target ? hard mask and return */
 926                if (xd->target == XIVE_INVALID_TARGET) {
 927                        xive_do_source_set_mask(xd, true);
 928                        return 0;
 929                }
 930
 931                /*
 932                 * Sync the XIVE source HW to ensure the interrupt
 933                 * has gone through the EAS before we change its
 934                 * target to the host.
 935                 */
 936                if (xive_ops->sync_source)
 937                        xive_ops->sync_source(hw_irq);
 938
 939                /*
 940                 * By convention we are called with the interrupt in
 941                 * a PQ=10 or PQ=11 state, ie, it won't fire and will
 942                 * have latched in Q whether there's a pending HW
 943                 * interrupt or not.
 944                 *
 945                 * First reconfigure the target.
 946                 */
 947                rc = xive_ops->configure_irq(hw_irq,
 948                                             get_hard_smp_processor_id(xd->target),
 949                                             xive_irq_priority, d->irq);
 950                if (rc)
 951                        return rc;
 952
 953                /*
 954                 * Then if saved_p is not set, effectively re-enable the
 955                 * interrupt with an EOI. If it is set, we know there is
 956                 * still a message in a host queue somewhere that will be
 957                 * EOId eventually.
 958                 *
 959                 * Note: We don't check irqd_irq_disabled(). Effectively,
 960                 * we *will* let the irq get through even if masked if the
 961                 * HW is still firing it in order to deal with the whole
 962                 * saved_p business properly. If the interrupt triggers
 963                 * while masked, the generic code will re-mask it anyway.
 964                 */
 965                if (!xd->saved_p)
 966                        xive_do_source_eoi(hw_irq, xd);
 967
 968        }
 969        return 0;
 970}
 971
 972/* Called with irq descriptor lock held. */
 973static int xive_get_irqchip_state(struct irq_data *data,
 974                                  enum irqchip_irq_state which, bool *state)
 975{
 976        struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
 977        u8 pq;
 978
 979        switch (which) {
 980        case IRQCHIP_STATE_ACTIVE:
 981                pq = xive_esb_read(xd, XIVE_ESB_GET);
 982
 983                /*
 984                 * The esb value being all 1's means we couldn't get
 985                 * the PQ state of the interrupt through mmio. It may
 986                 * happen, for example when querying a PHB interrupt
 987                 * while the PHB is in an error state. We consider the
 988                 * interrupt to be inactive in that case.
 989                 */
 990                *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p &&
 991                        (xd->saved_p || !!(pq & XIVE_ESB_VAL_P));
 992                return 0;
 993        default:
 994                return -EINVAL;
 995        }
 996}
 997
 998static struct irq_chip xive_irq_chip = {
 999        .name = "XIVE-IRQ",
1000        .irq_startup = xive_irq_startup,
1001        .irq_shutdown = xive_irq_shutdown,
1002        .irq_eoi = xive_irq_eoi,
1003        .irq_mask = xive_irq_mask,
1004        .irq_unmask = xive_irq_unmask,
1005        .irq_set_affinity = xive_irq_set_affinity,
1006        .irq_set_type = xive_irq_set_type,
1007        .irq_retrigger = xive_irq_retrigger,
1008        .irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
1009        .irq_get_irqchip_state = xive_get_irqchip_state,
1010};
1011
1012bool is_xive_irq(struct irq_chip *chip)
1013{
1014        return chip == &xive_irq_chip;
1015}
1016EXPORT_SYMBOL_GPL(is_xive_irq);
1017
1018void xive_cleanup_irq_data(struct xive_irq_data *xd)
1019{
1020        if (xd->eoi_mmio) {
1021                unmap_kernel_range((unsigned long)xd->eoi_mmio,
1022                                   1u << xd->esb_shift);
1023                iounmap(xd->eoi_mmio);
1024                if (xd->eoi_mmio == xd->trig_mmio)
1025                        xd->trig_mmio = NULL;
1026                xd->eoi_mmio = NULL;
1027        }
1028        if (xd->trig_mmio) {
1029                unmap_kernel_range((unsigned long)xd->trig_mmio,
1030                                   1u << xd->esb_shift);
1031                iounmap(xd->trig_mmio);
1032                xd->trig_mmio = NULL;
1033        }
1034}
1035EXPORT_SYMBOL_GPL(xive_cleanup_irq_data);
1036
1037static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
1038{
1039        struct xive_irq_data *xd;
1040        int rc;
1041
1042        xd = kzalloc(sizeof(struct xive_irq_data), GFP_KERNEL);
1043        if (!xd)
1044                return -ENOMEM;
1045        rc = xive_ops->populate_irq_data(hw, xd);
1046        if (rc) {
1047                kfree(xd);
1048                return rc;
1049        }
1050        xd->target = XIVE_INVALID_TARGET;
1051        irq_set_handler_data(virq, xd);
1052
1053        /*
1054         * Turn OFF by default the interrupt being mapped. A side
1055         * effect of this check is the mapping the ESB page of the
1056         * interrupt in the Linux address space. This prevents page
1057         * fault issues in the crash handler which masks all
1058         * interrupts.
1059         */
1060        xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
1061
1062        return 0;
1063}
1064
1065static void xive_irq_free_data(unsigned int virq)
1066{
1067        struct xive_irq_data *xd = irq_get_handler_data(virq);
1068
1069        if (!xd)
1070                return;
1071        irq_set_handler_data(virq, NULL);
1072        xive_cleanup_irq_data(xd);
1073        kfree(xd);
1074}
1075
1076#ifdef CONFIG_SMP
1077
1078static void xive_cause_ipi(int cpu)
1079{
1080        struct xive_cpu *xc;
1081        struct xive_irq_data *xd;
1082
1083        xc = per_cpu(xive_cpu, cpu);
1084
1085        DBG_VERBOSE("IPI CPU %d -> %d (HW IRQ 0x%x)\n",
1086                    smp_processor_id(), cpu, xc->hw_ipi);
1087
1088        xd = &xc->ipi_data;
1089        if (WARN_ON(!xd->trig_mmio))
1090                return;
1091        out_be64(xd->trig_mmio, 0);
1092}
1093
1094static irqreturn_t xive_muxed_ipi_action(int irq, void *dev_id)
1095{
1096        return smp_ipi_demux();
1097}
1098
1099static void xive_ipi_eoi(struct irq_data *d)
1100{
1101        struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1102
1103        DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
1104                    d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
1105
1106        /* Handle possible race with unplug and drop stale IPIs */
1107        if (!xc)
1108                return;
1109        xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data);
1110        xive_do_queue_eoi(xc);
1111}
1112
1113static void xive_ipi_do_nothing(struct irq_data *d)
1114{
1115        /*
1116         * Nothing to do, we never mask/unmask IPIs, but the callback
1117         * has to exist for the struct irq_chip.
1118         */
1119}
1120
1121static struct irq_chip xive_ipi_chip = {
1122        .name = "XIVE-IPI",
1123        .irq_eoi = xive_ipi_eoi,
1124        .irq_mask = xive_ipi_do_nothing,
1125        .irq_unmask = xive_ipi_do_nothing,
1126};
1127
1128static void __init xive_request_ipi(void)
1129{
1130        unsigned int virq;
1131
1132        /*
1133         * Initialization failed, move on, we might manage to
1134         * reach the point where we display our errors before
1135         * the system falls appart
1136         */
1137        if (!xive_irq_domain)
1138                return;
1139
1140        /* Initialize it */
1141        virq = irq_create_mapping(xive_irq_domain, 0);
1142        xive_ipi_irq = virq;
1143
1144        WARN_ON(request_irq(virq, xive_muxed_ipi_action,
1145                            IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
1146}
1147
1148static int xive_setup_cpu_ipi(unsigned int cpu)
1149{
1150        struct xive_cpu *xc;
1151        int rc;
1152
1153        pr_debug("Setting up IPI for CPU %d\n", cpu);
1154
1155        xc = per_cpu(xive_cpu, cpu);
1156
1157        /* Check if we are already setup */
1158        if (xc->hw_ipi != XIVE_BAD_IRQ)
1159                return 0;
1160
1161        /* Grab an IPI from the backend, this will populate xc->hw_ipi */
1162        if (xive_ops->get_ipi(cpu, xc))
1163                return -EIO;
1164
1165        /*
1166         * Populate the IRQ data in the xive_cpu structure and
1167         * configure the HW / enable the IPIs.
1168         */
1169        rc = xive_ops->populate_irq_data(xc->hw_ipi, &xc->ipi_data);
1170        if (rc) {
1171                pr_err("Failed to populate IPI data on CPU %d\n", cpu);
1172                return -EIO;
1173        }
1174        rc = xive_ops->configure_irq(xc->hw_ipi,
1175                                     get_hard_smp_processor_id(cpu),
1176                                     xive_irq_priority, xive_ipi_irq);
1177        if (rc) {
1178                pr_err("Failed to map IPI CPU %d\n", cpu);
1179                return -EIO;
1180        }
1181        pr_devel("CPU %d HW IPI %x, virq %d, trig_mmio=%p\n", cpu,
1182            xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio);
1183
1184        /* Unmask it */
1185        xive_do_source_set_mask(&xc->ipi_data, false);
1186
1187        return 0;
1188}
1189
1190static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc)
1191{
1192        /* Disable the IPI and free the IRQ data */
1193
1194        /* Already cleaned up ? */
1195        if (xc->hw_ipi == XIVE_BAD_IRQ)
1196                return;
1197
1198        /* Mask the IPI */
1199        xive_do_source_set_mask(&xc->ipi_data, true);
1200
1201        /*
1202         * Note: We don't call xive_cleanup_irq_data() to free
1203         * the mappings as this is called from an IPI on kexec
1204         * which is not a safe environment to call iounmap()
1205         */
1206
1207        /* Deconfigure/mask in the backend */
1208        xive_ops->configure_irq(xc->hw_ipi, hard_smp_processor_id(),
1209                                0xff, xive_ipi_irq);
1210
1211        /* Free the IPIs in the backend */
1212        xive_ops->put_ipi(cpu, xc);
1213}
1214
1215void __init xive_smp_probe(void)
1216{
1217        smp_ops->cause_ipi = xive_cause_ipi;
1218
1219        /* Register the IPI */
1220        xive_request_ipi();
1221
1222        /* Allocate and setup IPI for the boot CPU */
1223        xive_setup_cpu_ipi(smp_processor_id());
1224}
1225
1226#endif /* CONFIG_SMP */
1227
1228static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq,
1229                               irq_hw_number_t hw)
1230{
1231        int rc;
1232
1233        /*
1234         * Mark interrupts as edge sensitive by default so that resend
1235         * actually works. Will fix that up below if needed.
1236         */
1237        irq_clear_status_flags(virq, IRQ_LEVEL);
1238
1239#ifdef CONFIG_SMP
1240        /* IPIs are special and come up with HW number 0 */
1241        if (hw == 0) {
1242                /*
1243                 * IPIs are marked per-cpu. We use separate HW interrupts under
1244                 * the hood but associated with the same "linux" interrupt
1245                 */
1246                irq_set_chip_and_handler(virq, &xive_ipi_chip,
1247                                         handle_percpu_irq);
1248                return 0;
1249        }
1250#endif
1251
1252        rc = xive_irq_alloc_data(virq, hw);
1253        if (rc)
1254                return rc;
1255
1256        irq_set_chip_and_handler(virq, &xive_irq_chip, handle_fasteoi_irq);
1257
1258        return 0;
1259}
1260
1261static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq)
1262{
1263        struct irq_data *data = irq_get_irq_data(virq);
1264        unsigned int hw_irq;
1265
1266        /* XXX Assign BAD number */
1267        if (!data)
1268                return;
1269        hw_irq = (unsigned int)irqd_to_hwirq(data);
1270        if (hw_irq)
1271                xive_irq_free_data(virq);
1272}
1273
1274static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct,
1275                                 const u32 *intspec, unsigned int intsize,
1276                                 irq_hw_number_t *out_hwirq, unsigned int *out_flags)
1277
1278{
1279        *out_hwirq = intspec[0];
1280
1281        /*
1282         * If intsize is at least 2, we look for the type in the second cell,
1283         * we assume the LSB indicates a level interrupt.
1284         */
1285        if (intsize > 1) {
1286                if (intspec[1] & 1)
1287                        *out_flags = IRQ_TYPE_LEVEL_LOW;
1288                else
1289                        *out_flags = IRQ_TYPE_EDGE_RISING;
1290        } else
1291                *out_flags = IRQ_TYPE_LEVEL_LOW;
1292
1293        return 0;
1294}
1295
1296static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node,
1297                                 enum irq_domain_bus_token bus_token)
1298{
1299        return xive_ops->match(node);
1300}
1301
1302static const struct irq_domain_ops xive_irq_domain_ops = {
1303        .match = xive_irq_domain_match,
1304        .map = xive_irq_domain_map,
1305        .unmap = xive_irq_domain_unmap,
1306        .xlate = xive_irq_domain_xlate,
1307};
1308
1309static void __init xive_init_host(void)
1310{
1311        xive_irq_domain = irq_domain_add_nomap(NULL, XIVE_MAX_IRQ,
1312                                               &xive_irq_domain_ops, NULL);
1313        if (WARN_ON(xive_irq_domain == NULL))
1314                return;
1315        irq_set_default_host(xive_irq_domain);
1316}
1317
1318static void xive_cleanup_cpu_queues(unsigned int cpu, struct xive_cpu *xc)
1319{
1320        if (xc->queue[xive_irq_priority].qpage)
1321                xive_ops->cleanup_queue(cpu, xc, xive_irq_priority);
1322}
1323
1324static int xive_setup_cpu_queues(unsigned int cpu, struct xive_cpu *xc)
1325{
1326        int rc = 0;
1327
1328        /* We setup 1 queues for now with a 64k page */
1329        if (!xc->queue[xive_irq_priority].qpage)
1330                rc = xive_ops->setup_queue(cpu, xc, xive_irq_priority);
1331
1332        return rc;
1333}
1334
1335static int xive_prepare_cpu(unsigned int cpu)
1336{
1337        struct xive_cpu *xc;
1338
1339        xc = per_cpu(xive_cpu, cpu);
1340        if (!xc) {
1341                struct device_node *np;
1342
1343                xc = kzalloc_node(sizeof(struct xive_cpu),
1344                                  GFP_KERNEL, cpu_to_node(cpu));
1345                if (!xc)
1346                        return -ENOMEM;
1347                np = of_get_cpu_node(cpu, NULL);
1348                if (np)
1349                        xc->chip_id = of_get_ibm_chip_id(np);
1350                of_node_put(np);
1351                xc->hw_ipi = XIVE_BAD_IRQ;
1352
1353                per_cpu(xive_cpu, cpu) = xc;
1354        }
1355
1356        /* Setup EQs if not already */
1357        return xive_setup_cpu_queues(cpu, xc);
1358}
1359
1360static void xive_setup_cpu(void)
1361{
1362        struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1363
1364        /* The backend might have additional things to do */
1365        if (xive_ops->setup_cpu)
1366                xive_ops->setup_cpu(smp_processor_id(), xc);
1367
1368        /* Set CPPR to 0xff to enable flow of interrupts */
1369        xc->cppr = 0xff;
1370        out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
1371}
1372
1373#ifdef CONFIG_SMP
1374void xive_smp_setup_cpu(void)
1375{
1376        pr_devel("SMP setup CPU %d\n", smp_processor_id());
1377
1378        /* This will have already been done on the boot CPU */
1379        if (smp_processor_id() != boot_cpuid)
1380                xive_setup_cpu();
1381
1382}
1383
1384int xive_smp_prepare_cpu(unsigned int cpu)
1385{
1386        int rc;
1387
1388        /* Allocate per-CPU data and queues */
1389        rc = xive_prepare_cpu(cpu);
1390        if (rc)
1391                return rc;
1392
1393        /* Allocate and setup IPI for the new CPU */
1394        return xive_setup_cpu_ipi(cpu);
1395}
1396
1397#ifdef CONFIG_HOTPLUG_CPU
1398static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
1399{
1400        u32 irq;
1401
1402        /* We assume local irqs are disabled */
1403        WARN_ON(!irqs_disabled());
1404
1405        /* Check what's already in the CPU queue */
1406        while ((irq = xive_scan_interrupts(xc, false)) != 0) {
1407                /*
1408                 * We need to re-route that interrupt to its new destination.
1409                 * First get and lock the descriptor
1410                 */
1411                struct irq_desc *desc = irq_to_desc(irq);
1412                struct irq_data *d = irq_desc_get_irq_data(desc);
1413                struct xive_irq_data *xd;
1414                unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
1415
1416                /*
1417                 * Ignore anything that isn't a XIVE irq and ignore
1418                 * IPIs, so can just be dropped.
1419                 */
1420                if (d->domain != xive_irq_domain || hw_irq == 0)
1421                        continue;
1422
1423                /*
1424                 * The IRQ should have already been re-routed, it's just a
1425                 * stale in the old queue, so re-trigger it in order to make
1426                 * it reach is new destination.
1427                 */
1428#ifdef DEBUG_FLUSH
1429                pr_info("CPU %d: Got irq %d while offline, re-sending...\n",
1430                        cpu, irq);
1431#endif
1432                raw_spin_lock(&desc->lock);
1433                xd = irq_desc_get_handler_data(desc);
1434
1435                /*
1436                 * Clear saved_p to indicate that it's no longer pending
1437                 */
1438                xd->saved_p = false;
1439
1440                /*
1441                 * For LSIs, we EOI, this will cause a resend if it's
1442                 * still asserted. Otherwise do an MSI retrigger.
1443                 */
1444                if (xd->flags & XIVE_IRQ_FLAG_LSI)
1445                        xive_do_source_eoi(irqd_to_hwirq(d), xd);
1446                else
1447                        xive_irq_retrigger(d);
1448
1449                raw_spin_unlock(&desc->lock);
1450        }
1451}
1452
1453void xive_smp_disable_cpu(void)
1454{
1455        struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1456        unsigned int cpu = smp_processor_id();
1457
1458        /* Migrate interrupts away from the CPU */
1459        irq_migrate_all_off_this_cpu();
1460
1461        /* Set CPPR to 0 to disable flow of interrupts */
1462        xc->cppr = 0;
1463        out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
1464
1465        /* Flush everything still in the queue */
1466        xive_flush_cpu_queue(cpu, xc);
1467
1468        /* Re-enable CPPR  */
1469        xc->cppr = 0xff;
1470        out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
1471}
1472
1473void xive_flush_interrupt(void)
1474{
1475        struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1476        unsigned int cpu = smp_processor_id();
1477
1478        /* Called if an interrupt occurs while the CPU is hot unplugged */
1479        xive_flush_cpu_queue(cpu, xc);
1480}
1481
1482#endif /* CONFIG_HOTPLUG_CPU */
1483
1484#endif /* CONFIG_SMP */
1485
1486void xive_teardown_cpu(void)
1487{
1488        struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1489        unsigned int cpu = smp_processor_id();
1490
1491        /* Set CPPR to 0 to disable flow of interrupts */
1492        xc->cppr = 0;
1493        out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
1494
1495        if (xive_ops->teardown_cpu)
1496                xive_ops->teardown_cpu(cpu, xc);
1497
1498#ifdef CONFIG_SMP
1499        /* Get rid of IPI */
1500        xive_cleanup_cpu_ipi(cpu, xc);
1501#endif
1502
1503        /* Disable and free the queues */
1504        xive_cleanup_cpu_queues(cpu, xc);
1505}
1506
1507void xive_kexec_teardown_cpu(int secondary)
1508{
1509        struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1510        unsigned int cpu = smp_processor_id();
1511
1512        /* Set CPPR to 0 to disable flow of interrupts */
1513        xc->cppr = 0;
1514        out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
1515
1516        /* Backend cleanup if any */
1517        if (xive_ops->teardown_cpu)
1518                xive_ops->teardown_cpu(cpu, xc);
1519
1520#ifdef CONFIG_SMP
1521        /* Get rid of IPI */
1522        xive_cleanup_cpu_ipi(cpu, xc);
1523#endif
1524
1525        /* Disable and free the queues */
1526        xive_cleanup_cpu_queues(cpu, xc);
1527}
1528
1529void xive_shutdown(void)
1530{
1531        xive_ops->shutdown();
1532}
1533
1534bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
1535                           u8 max_prio)
1536{
1537        xive_tima = area;
1538        xive_tima_offset = offset;
1539        xive_ops = ops;
1540        xive_irq_priority = max_prio;
1541
1542        ppc_md.get_irq = xive_get_irq;
1543        __xive_enabled = true;
1544
1545        pr_devel("Initializing host..\n");
1546        xive_init_host();
1547
1548        pr_devel("Initializing boot CPU..\n");
1549
1550        /* Allocate per-CPU data and queues */
1551        xive_prepare_cpu(smp_processor_id());
1552
1553        /* Get ready for interrupts */
1554        xive_setup_cpu();
1555
1556        pr_info("Interrupt handling initialized with %s backend\n",
1557                xive_ops->name);
1558        pr_info("Using priority %d for all interrupts\n", max_prio);
1559
1560        return true;
1561}
1562
1563__be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift)
1564{
1565        unsigned int alloc_order;
1566        struct page *pages;
1567        __be32 *qpage;
1568
1569        alloc_order = xive_alloc_order(queue_shift);
1570        pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
1571        if (!pages)
1572                return ERR_PTR(-ENOMEM);
1573        qpage = (__be32 *)page_address(pages);
1574        memset(qpage, 0, 1 << queue_shift);
1575
1576        return qpage;
1577}
1578
1579static int __init xive_off(char *arg)
1580{
1581        xive_cmdline_disabled = true;
1582        return 0;
1583}
1584__setup("xive=off", xive_off);
1585
1586void xive_debug_show_cpu(struct seq_file *m, int cpu)
1587{
1588        struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
1589
1590        seq_printf(m, "CPU %d:", cpu);
1591        if (xc) {
1592                seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr);
1593
1594#ifdef CONFIG_SMP
1595                {
1596                        u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
1597
1598                        seq_printf(m, "IPI=0x%08x PQ=%c%c ", xc->hw_ipi,
1599                                   val & XIVE_ESB_VAL_P ? 'P' : '-',
1600                                   val & XIVE_ESB_VAL_Q ? 'Q' : '-');
1601                }
1602#endif
1603                {
1604                        struct xive_q *q = &xc->queue[xive_irq_priority];
1605                        u32 i0, i1, idx;
1606
1607                        if (q->qpage) {
1608                                idx = q->idx;
1609                                i0 = be32_to_cpup(q->qpage + idx);
1610                                idx = (idx + 1) & q->msk;
1611                                i1 = be32_to_cpup(q->qpage + idx);
1612                                seq_printf(m, "EQ idx=%d T=%d %08x %08x ...",
1613                                           q->idx, q->toggle, i0, i1);
1614                        }
1615                }
1616        }
1617        seq_puts(m, "\n");
1618}
1619
1620void xive_debug_show_irq(struct seq_file *m, u32 hw_irq, struct irq_data *d)
1621{
1622        struct irq_chip *chip = irq_data_get_irq_chip(d);
1623        int rc;
1624        u32 target;
1625        u8 prio;
1626        u32 lirq;
1627
1628        if (!is_xive_irq(chip))
1629                return;
1630
1631        rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
1632        if (rc) {
1633                seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
1634                return;
1635        }
1636
1637        seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
1638                   hw_irq, target, prio, lirq);
1639
1640        if (d) {
1641                struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
1642                u64 val = xive_esb_read(xd, XIVE_ESB_GET);
1643
1644                seq_printf(m, "flags=%c%c%c PQ=%c%c",
1645                           xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ',
1646                           xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
1647                           xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
1648                           val & XIVE_ESB_VAL_P ? 'P' : '-',
1649                           val & XIVE_ESB_VAL_Q ? 'Q' : '-');
1650        }
1651        seq_puts(m, "\n");
1652}
1653
1654static int xive_core_debug_show(struct seq_file *m, void *private)
1655{
1656        unsigned int i;
1657        struct irq_desc *desc;
1658        int cpu;
1659
1660        if (xive_ops->debug_show)
1661                xive_ops->debug_show(m, private);
1662
1663        for_each_possible_cpu(cpu)
1664                xive_debug_show_cpu(m, cpu);
1665
1666        for_each_irq_desc(i, desc) {
1667                struct irq_data *d = irq_desc_get_irq_data(desc);
1668                unsigned int hw_irq;
1669
1670                if (!d)
1671                        continue;
1672
1673                hw_irq = (unsigned int)irqd_to_hwirq(d);
1674
1675                /* IPIs are special (HW number 0) */
1676                if (hw_irq)
1677                        xive_debug_show_irq(m, hw_irq, d);
1678        }
1679        return 0;
1680}
1681DEFINE_SHOW_ATTRIBUTE(xive_core_debug);
1682
1683int xive_core_debug_init(void)
1684{
1685        if (xive_enabled())
1686                debugfs_create_file("xive", 0400, powerpc_debugfs_root,
1687                                    NULL, &xive_core_debug_fops);
1688        return 0;
1689}
1690