linux/drivers/xen/events/events_base.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Xen event channels
   4 *
   5 * Xen models interrupts with abstract event channels.  Because each
   6 * domain gets 1024 event channels, but NR_IRQ is not that large, we
   7 * must dynamically map irqs<->event channels.  The event channels
   8 * interface with the rest of the kernel by defining a xen interrupt
   9 * chip.  When an event is received, it is mapped to an irq and sent
  10 * through the normal interrupt processing path.
  11 *
  12 * There are four kinds of events which can be mapped to an event
  13 * channel:
  14 *
  15 * 1. Inter-domain notifications.  This includes all the virtual
  16 *    device events, since they're driven by front-ends in another domain
  17 *    (typically dom0).
  18 * 2. VIRQs, typically used for timers.  These are per-cpu events.
  19 * 3. IPIs.
  20 * 4. PIRQs - Hardware interrupts.
  21 *
  22 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
  23 */
  24
  25#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
  26
  27#include <linux/linkage.h>
  28#include <linux/interrupt.h>
  29#include <linux/irq.h>
  30#include <linux/moduleparam.h>
  31#include <linux/string.h>
  32#include <linux/memblock.h>
  33#include <linux/slab.h>
  34#include <linux/irqnr.h>
  35#include <linux/pci.h>
  36#include <linux/spinlock.h>
  37#include <linux/cpuhotplug.h>
  38#include <linux/atomic.h>
  39#include <linux/ktime.h>
  40
  41#ifdef CONFIG_X86
  42#include <asm/desc.h>
  43#include <asm/ptrace.h>
  44#include <asm/idtentry.h>
  45#include <asm/irq.h>
  46#include <asm/io_apic.h>
  47#include <asm/i8259.h>
  48#include <asm/xen/pci.h>
  49#endif
  50#include <asm/sync_bitops.h>
  51#include <asm/xen/hypercall.h>
  52#include <asm/xen/hypervisor.h>
  53#include <xen/page.h>
  54
  55#include <xen/xen.h>
  56#include <xen/hvm.h>
  57#include <xen/xen-ops.h>
  58#include <xen/events.h>
  59#include <xen/interface/xen.h>
  60#include <xen/interface/event_channel.h>
  61#include <xen/interface/hvm/hvm_op.h>
  62#include <xen/interface/hvm/params.h>
  63#include <xen/interface/physdev.h>
  64#include <xen/interface/sched.h>
  65#include <xen/interface/vcpu.h>
  66#include <xen/xenbus.h>
  67#include <asm/hw_irq.h>
  68
  69#include "events_internal.h"
  70
  71#undef MODULE_PARAM_PREFIX
  72#define MODULE_PARAM_PREFIX "xen."
  73
  74/* Interrupt types. */
  75enum xen_irq_type {
  76        IRQT_UNBOUND = 0,
  77        IRQT_PIRQ,
  78        IRQT_VIRQ,
  79        IRQT_IPI,
  80        IRQT_EVTCHN
  81};
  82
  83/*
  84 * Packed IRQ information:
  85 * type - enum xen_irq_type
  86 * event channel - irq->event channel mapping
  87 * cpu - cpu this event channel is bound to
  88 * index - type-specific information:
  89 *    PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
  90 *           guest, or GSI (real passthrough IRQ) of the device.
  91 *    VIRQ - virq number
  92 *    IPI - IPI vector
  93 *    EVTCHN -
  94 */
  95struct irq_info {
  96        struct list_head list;
  97        struct list_head eoi_list;
  98        short refcnt;
  99        u8 spurious_cnt;
 100        u8 is_accounted;
 101        short type;             /* type: IRQT_* */
 102        u8 mask_reason;         /* Why is event channel masked */
 103#define EVT_MASK_REASON_EXPLICIT        0x01
 104#define EVT_MASK_REASON_TEMPORARY       0x02
 105#define EVT_MASK_REASON_EOI_PENDING     0x04
 106        u8 is_active;           /* Is event just being handled? */
 107        unsigned irq;
 108        evtchn_port_t evtchn;   /* event channel */
 109        unsigned short cpu;     /* cpu bound */
 110        unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
 111        unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
 112        u64 eoi_time;           /* Time in jiffies when to EOI. */
 113        raw_spinlock_t lock;
 114
 115        union {
 116                unsigned short virq;
 117                enum ipi_vector ipi;
 118                struct {
 119                        unsigned short pirq;
 120                        unsigned short gsi;
 121                        unsigned char vector;
 122                        unsigned char flags;
 123                        uint16_t domid;
 124                } pirq;
 125                struct xenbus_device *interdomain;
 126        } u;
 127};
 128
 129#define PIRQ_NEEDS_EOI  (1 << 0)
 130#define PIRQ_SHAREABLE  (1 << 1)
 131#define PIRQ_MSI_GROUP  (1 << 2)
 132
 133static uint __read_mostly event_loop_timeout = 2;
 134module_param(event_loop_timeout, uint, 0644);
 135
 136static uint __read_mostly event_eoi_delay = 10;
 137module_param(event_eoi_delay, uint, 0644);
 138
 139const struct evtchn_ops *evtchn_ops;
 140
 141/*
 142 * This lock protects updates to the following mapping and reference-count
 143 * arrays. The lock does not need to be acquired to read the mapping tables.
 144 */
 145static DEFINE_MUTEX(irq_mapping_update_lock);
 146
 147/*
 148 * Lock protecting event handling loop against removing event channels.
 149 * Adding of event channels is no issue as the associated IRQ becomes active
 150 * only after everything is setup (before request_[threaded_]irq() the handler
 151 * can't be entered for an event, as the event channel will be unmasked only
 152 * then).
 153 */
 154static DEFINE_RWLOCK(evtchn_rwlock);
 155
 156/*
 157 * Lock hierarchy:
 158 *
 159 * irq_mapping_update_lock
 160 *   evtchn_rwlock
 161 *     IRQ-desc lock
 162 *       percpu eoi_list_lock
 163 *         irq_info->lock
 164 */
 165
 166static LIST_HEAD(xen_irq_list_head);
 167
 168/* IRQ <-> VIRQ mapping. */
 169static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
 170
 171/* IRQ <-> IPI mapping */
 172static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
 173
 174/* Event channel distribution data */
 175static atomic_t channels_on_cpu[NR_CPUS];
 176
 177static int **evtchn_to_irq;
 178#ifdef CONFIG_X86
 179static unsigned long *pirq_eoi_map;
 180#endif
 181static bool (*pirq_needs_eoi)(unsigned irq);
 182
 183#define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
 184#define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
 185#define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
 186
 187/* Xen will never allocate port zero for any purpose. */
 188#define VALID_EVTCHN(chn)       ((chn) != 0)
 189
 190static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
 191
 192static struct irq_chip xen_dynamic_chip;
 193static struct irq_chip xen_lateeoi_chip;
 194static struct irq_chip xen_percpu_chip;
 195static struct irq_chip xen_pirq_chip;
 196static void enable_dynirq(struct irq_data *data);
 197static void disable_dynirq(struct irq_data *data);
 198
 199static DEFINE_PER_CPU(unsigned int, irq_epoch);
 200
 201static void clear_evtchn_to_irq_row(int *evtchn_row)
 202{
 203        unsigned col;
 204
 205        for (col = 0; col < EVTCHN_PER_ROW; col++)
 206                WRITE_ONCE(evtchn_row[col], -1);
 207}
 208
 209static void clear_evtchn_to_irq_all(void)
 210{
 211        unsigned row;
 212
 213        for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
 214                if (evtchn_to_irq[row] == NULL)
 215                        continue;
 216                clear_evtchn_to_irq_row(evtchn_to_irq[row]);
 217        }
 218}
 219
 220static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
 221{
 222        unsigned row;
 223        unsigned col;
 224        int *evtchn_row;
 225
 226        if (evtchn >= xen_evtchn_max_channels())
 227                return -EINVAL;
 228
 229        row = EVTCHN_ROW(evtchn);
 230        col = EVTCHN_COL(evtchn);
 231
 232        if (evtchn_to_irq[row] == NULL) {
 233                /* Unallocated irq entries return -1 anyway */
 234                if (irq == -1)
 235                        return 0;
 236
 237                evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0);
 238                if (evtchn_row == NULL)
 239                        return -ENOMEM;
 240
 241                clear_evtchn_to_irq_row(evtchn_row);
 242
 243                /*
 244                 * We've prepared an empty row for the mapping. If a different
 245                 * thread was faster inserting it, we can drop ours.
 246                 */
 247                if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL)
 248                        free_page((unsigned long) evtchn_row);
 249        }
 250
 251        WRITE_ONCE(evtchn_to_irq[row][col], irq);
 252        return 0;
 253}
 254
 255int get_evtchn_to_irq(evtchn_port_t evtchn)
 256{
 257        if (evtchn >= xen_evtchn_max_channels())
 258                return -1;
 259        if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
 260                return -1;
 261        return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
 262}
 263
 264/* Get info for IRQ */
 265static struct irq_info *info_for_irq(unsigned irq)
 266{
 267        if (irq < nr_legacy_irqs())
 268                return legacy_info_ptrs[irq];
 269        else
 270                return irq_get_chip_data(irq);
 271}
 272
 273static void set_info_for_irq(unsigned int irq, struct irq_info *info)
 274{
 275        if (irq < nr_legacy_irqs())
 276                legacy_info_ptrs[irq] = info;
 277        else
 278                irq_set_chip_data(irq, info);
 279}
 280
 281/* Per CPU channel accounting */
 282static void channels_on_cpu_dec(struct irq_info *info)
 283{
 284        if (!info->is_accounted)
 285                return;
 286
 287        info->is_accounted = 0;
 288
 289        if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
 290                return;
 291
 292        WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
 293}
 294
 295static void channels_on_cpu_inc(struct irq_info *info)
 296{
 297        if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
 298                return;
 299
 300        if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
 301                                            INT_MAX)))
 302                return;
 303
 304        info->is_accounted = 1;
 305}
 306
 307/* Constructors for packed IRQ information. */
 308static int xen_irq_info_common_setup(struct irq_info *info,
 309                                     unsigned irq,
 310                                     enum xen_irq_type type,
 311                                     evtchn_port_t evtchn,
 312                                     unsigned short cpu)
 313{
 314        int ret;
 315
 316        BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
 317
 318        info->type = type;
 319        info->irq = irq;
 320        info->evtchn = evtchn;
 321        info->cpu = cpu;
 322        info->mask_reason = EVT_MASK_REASON_EXPLICIT;
 323        raw_spin_lock_init(&info->lock);
 324
 325        ret = set_evtchn_to_irq(evtchn, irq);
 326        if (ret < 0)
 327                return ret;
 328
 329        irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
 330
 331        return xen_evtchn_port_setup(evtchn);
 332}
 333
 334static int xen_irq_info_evtchn_setup(unsigned irq,
 335                                     evtchn_port_t evtchn,
 336                                     struct xenbus_device *dev)
 337{
 338        struct irq_info *info = info_for_irq(irq);
 339        int ret;
 340
 341        ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
 342        info->u.interdomain = dev;
 343        if (dev)
 344                atomic_inc(&dev->event_channels);
 345
 346        return ret;
 347}
 348
 349static int xen_irq_info_ipi_setup(unsigned cpu,
 350                                  unsigned irq,
 351                                  evtchn_port_t evtchn,
 352                                  enum ipi_vector ipi)
 353{
 354        struct irq_info *info = info_for_irq(irq);
 355
 356        info->u.ipi = ipi;
 357
 358        per_cpu(ipi_to_irq, cpu)[ipi] = irq;
 359
 360        return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
 361}
 362
 363static int xen_irq_info_virq_setup(unsigned cpu,
 364                                   unsigned irq,
 365                                   evtchn_port_t evtchn,
 366                                   unsigned virq)
 367{
 368        struct irq_info *info = info_for_irq(irq);
 369
 370        info->u.virq = virq;
 371
 372        per_cpu(virq_to_irq, cpu)[virq] = irq;
 373
 374        return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
 375}
 376
 377static int xen_irq_info_pirq_setup(unsigned irq,
 378                                   evtchn_port_t evtchn,
 379                                   unsigned pirq,
 380                                   unsigned gsi,
 381                                   uint16_t domid,
 382                                   unsigned char flags)
 383{
 384        struct irq_info *info = info_for_irq(irq);
 385
 386        info->u.pirq.pirq = pirq;
 387        info->u.pirq.gsi = gsi;
 388        info->u.pirq.domid = domid;
 389        info->u.pirq.flags = flags;
 390
 391        return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
 392}
 393
 394static void xen_irq_info_cleanup(struct irq_info *info)
 395{
 396        set_evtchn_to_irq(info->evtchn, -1);
 397        xen_evtchn_port_remove(info->evtchn, info->cpu);
 398        info->evtchn = 0;
 399        channels_on_cpu_dec(info);
 400}
 401
 402/*
 403 * Accessors for packed IRQ information.
 404 */
 405evtchn_port_t evtchn_from_irq(unsigned irq)
 406{
 407        const struct irq_info *info = NULL;
 408
 409        if (likely(irq < nr_irqs))
 410                info = info_for_irq(irq);
 411        if (!info)
 412                return 0;
 413
 414        return info->evtchn;
 415}
 416
 417unsigned int irq_from_evtchn(evtchn_port_t evtchn)
 418{
 419        return get_evtchn_to_irq(evtchn);
 420}
 421EXPORT_SYMBOL_GPL(irq_from_evtchn);
 422
 423int irq_from_virq(unsigned int cpu, unsigned int virq)
 424{
 425        return per_cpu(virq_to_irq, cpu)[virq];
 426}
 427
 428static enum ipi_vector ipi_from_irq(unsigned irq)
 429{
 430        struct irq_info *info = info_for_irq(irq);
 431
 432        BUG_ON(info == NULL);
 433        BUG_ON(info->type != IRQT_IPI);
 434
 435        return info->u.ipi;
 436}
 437
 438static unsigned virq_from_irq(unsigned irq)
 439{
 440        struct irq_info *info = info_for_irq(irq);
 441
 442        BUG_ON(info == NULL);
 443        BUG_ON(info->type != IRQT_VIRQ);
 444
 445        return info->u.virq;
 446}
 447
 448static unsigned pirq_from_irq(unsigned irq)
 449{
 450        struct irq_info *info = info_for_irq(irq);
 451
 452        BUG_ON(info == NULL);
 453        BUG_ON(info->type != IRQT_PIRQ);
 454
 455        return info->u.pirq.pirq;
 456}
 457
 458static enum xen_irq_type type_from_irq(unsigned irq)
 459{
 460        return info_for_irq(irq)->type;
 461}
 462
 463static unsigned cpu_from_irq(unsigned irq)
 464{
 465        return info_for_irq(irq)->cpu;
 466}
 467
 468unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
 469{
 470        int irq = get_evtchn_to_irq(evtchn);
 471        unsigned ret = 0;
 472
 473        if (irq != -1)
 474                ret = cpu_from_irq(irq);
 475
 476        return ret;
 477}
 478
 479static void do_mask(struct irq_info *info, u8 reason)
 480{
 481        unsigned long flags;
 482
 483        raw_spin_lock_irqsave(&info->lock, flags);
 484
 485        if (!info->mask_reason)
 486                mask_evtchn(info->evtchn);
 487
 488        info->mask_reason |= reason;
 489
 490        raw_spin_unlock_irqrestore(&info->lock, flags);
 491}
 492
 493static void do_unmask(struct irq_info *info, u8 reason)
 494{
 495        unsigned long flags;
 496
 497        raw_spin_lock_irqsave(&info->lock, flags);
 498
 499        info->mask_reason &= ~reason;
 500
 501        if (!info->mask_reason)
 502                unmask_evtchn(info->evtchn);
 503
 504        raw_spin_unlock_irqrestore(&info->lock, flags);
 505}
 506
 507#ifdef CONFIG_X86
 508static bool pirq_check_eoi_map(unsigned irq)
 509{
 510        return test_bit(pirq_from_irq(irq), pirq_eoi_map);
 511}
 512#endif
 513
 514static bool pirq_needs_eoi_flag(unsigned irq)
 515{
 516        struct irq_info *info = info_for_irq(irq);
 517        BUG_ON(info->type != IRQT_PIRQ);
 518
 519        return info->u.pirq.flags & PIRQ_NEEDS_EOI;
 520}
 521
 522static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
 523                               bool force_affinity)
 524{
 525        int irq = get_evtchn_to_irq(evtchn);
 526        struct irq_info *info = info_for_irq(irq);
 527
 528        BUG_ON(irq == -1);
 529
 530        if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
 531                cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
 532                cpumask_copy(irq_get_effective_affinity_mask(irq),
 533                             cpumask_of(cpu));
 534        }
 535
 536        xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
 537
 538        channels_on_cpu_dec(info);
 539        info->cpu = cpu;
 540        channels_on_cpu_inc(info);
 541}
 542
 543/**
 544 * notify_remote_via_irq - send event to remote end of event channel via irq
 545 * @irq: irq of event channel to send event to
 546 *
 547 * Unlike notify_remote_via_evtchn(), this is safe to use across
 548 * save/restore. Notifications on a broken connection are silently
 549 * dropped.
 550 */
 551void notify_remote_via_irq(int irq)
 552{
 553        evtchn_port_t evtchn = evtchn_from_irq(irq);
 554
 555        if (VALID_EVTCHN(evtchn))
 556                notify_remote_via_evtchn(evtchn);
 557}
 558EXPORT_SYMBOL_GPL(notify_remote_via_irq);
 559
 560struct lateeoi_work {
 561        struct delayed_work delayed;
 562        spinlock_t eoi_list_lock;
 563        struct list_head eoi_list;
 564};
 565
 566static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
 567
 568static void lateeoi_list_del(struct irq_info *info)
 569{
 570        struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
 571        unsigned long flags;
 572
 573        spin_lock_irqsave(&eoi->eoi_list_lock, flags);
 574        list_del_init(&info->eoi_list);
 575        spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
 576}
 577
 578static void lateeoi_list_add(struct irq_info *info)
 579{
 580        struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
 581        struct irq_info *elem;
 582        u64 now = get_jiffies_64();
 583        unsigned long delay;
 584        unsigned long flags;
 585
 586        if (now < info->eoi_time)
 587                delay = info->eoi_time - now;
 588        else
 589                delay = 1;
 590
 591        spin_lock_irqsave(&eoi->eoi_list_lock, flags);
 592
 593        if (list_empty(&eoi->eoi_list)) {
 594                list_add(&info->eoi_list, &eoi->eoi_list);
 595                mod_delayed_work_on(info->eoi_cpu, system_wq,
 596                                    &eoi->delayed, delay);
 597        } else {
 598                list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
 599                        if (elem->eoi_time <= info->eoi_time)
 600                                break;
 601                }
 602                list_add(&info->eoi_list, &elem->eoi_list);
 603        }
 604
 605        spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
 606}
 607
 608static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
 609{
 610        evtchn_port_t evtchn;
 611        unsigned int cpu;
 612        unsigned int delay = 0;
 613
 614        evtchn = info->evtchn;
 615        if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
 616                return;
 617
 618        if (spurious) {
 619                struct xenbus_device *dev = info->u.interdomain;
 620                unsigned int threshold = 1;
 621
 622                if (dev && dev->spurious_threshold)
 623                        threshold = dev->spurious_threshold;
 624
 625                if ((1 << info->spurious_cnt) < (HZ << 2)) {
 626                        if (info->spurious_cnt != 0xFF)
 627                                info->spurious_cnt++;
 628                }
 629                if (info->spurious_cnt > threshold) {
 630                        delay = 1 << (info->spurious_cnt - 1 - threshold);
 631                        if (delay > HZ)
 632                                delay = HZ;
 633                        if (!info->eoi_time)
 634                                info->eoi_cpu = smp_processor_id();
 635                        info->eoi_time = get_jiffies_64() + delay;
 636                        if (dev)
 637                                atomic_add(delay, &dev->jiffies_eoi_delayed);
 638                }
 639                if (dev)
 640                        atomic_inc(&dev->spurious_events);
 641        } else {
 642                info->spurious_cnt = 0;
 643        }
 644
 645        cpu = info->eoi_cpu;
 646        if (info->eoi_time &&
 647            (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
 648                lateeoi_list_add(info);
 649                return;
 650        }
 651
 652        info->eoi_time = 0;
 653
 654        /* is_active hasn't been reset yet, do it now. */
 655        smp_store_release(&info->is_active, 0);
 656        do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
 657}
 658
 659static void xen_irq_lateeoi_worker(struct work_struct *work)
 660{
 661        struct lateeoi_work *eoi;
 662        struct irq_info *info;
 663        u64 now = get_jiffies_64();
 664        unsigned long flags;
 665
 666        eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
 667
 668        read_lock_irqsave(&evtchn_rwlock, flags);
 669
 670        while (true) {
 671                spin_lock(&eoi->eoi_list_lock);
 672
 673                info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
 674                                                eoi_list);
 675
 676                if (info == NULL || now < info->eoi_time) {
 677                        spin_unlock(&eoi->eoi_list_lock);
 678                        break;
 679                }
 680
 681                list_del_init(&info->eoi_list);
 682
 683                spin_unlock(&eoi->eoi_list_lock);
 684
 685                info->eoi_time = 0;
 686
 687                xen_irq_lateeoi_locked(info, false);
 688        }
 689
 690        if (info)
 691                mod_delayed_work_on(info->eoi_cpu, system_wq,
 692                                    &eoi->delayed, info->eoi_time - now);
 693
 694        read_unlock_irqrestore(&evtchn_rwlock, flags);
 695}
 696
 697static void xen_cpu_init_eoi(unsigned int cpu)
 698{
 699        struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
 700
 701        INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
 702        spin_lock_init(&eoi->eoi_list_lock);
 703        INIT_LIST_HEAD(&eoi->eoi_list);
 704}
 705
 706void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
 707{
 708        struct irq_info *info;
 709        unsigned long flags;
 710
 711        read_lock_irqsave(&evtchn_rwlock, flags);
 712
 713        info = info_for_irq(irq);
 714
 715        if (info)
 716                xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
 717
 718        read_unlock_irqrestore(&evtchn_rwlock, flags);
 719}
 720EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
 721
 722static void xen_irq_init(unsigned irq)
 723{
 724        struct irq_info *info;
 725
 726        info = kzalloc(sizeof(*info), GFP_KERNEL);
 727        if (info == NULL)
 728                panic("Unable to allocate metadata for IRQ%d\n", irq);
 729
 730        info->type = IRQT_UNBOUND;
 731        info->refcnt = -1;
 732
 733        set_info_for_irq(irq, info);
 734        /*
 735         * Interrupt affinity setting can be immediate. No point
 736         * in delaying it until an interrupt is handled.
 737         */
 738        irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
 739
 740        INIT_LIST_HEAD(&info->eoi_list);
 741        list_add_tail(&info->list, &xen_irq_list_head);
 742}
 743
 744static int __must_check xen_allocate_irqs_dynamic(int nvec)
 745{
 746        int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
 747
 748        if (irq >= 0) {
 749                for (i = 0; i < nvec; i++)
 750                        xen_irq_init(irq + i);
 751        }
 752
 753        return irq;
 754}
 755
 756static inline int __must_check xen_allocate_irq_dynamic(void)
 757{
 758
 759        return xen_allocate_irqs_dynamic(1);
 760}
 761
 762static int __must_check xen_allocate_irq_gsi(unsigned gsi)
 763{
 764        int irq;
 765
 766        /*
 767         * A PV guest has no concept of a GSI (since it has no ACPI
 768         * nor access to/knowledge of the physical APICs). Therefore
 769         * all IRQs are dynamically allocated from the entire IRQ
 770         * space.
 771         */
 772        if (xen_pv_domain() && !xen_initial_domain())
 773                return xen_allocate_irq_dynamic();
 774
 775        /* Legacy IRQ descriptors are already allocated by the arch. */
 776        if (gsi < nr_legacy_irqs())
 777                irq = gsi;
 778        else
 779                irq = irq_alloc_desc_at(gsi, -1);
 780
 781        xen_irq_init(irq);
 782
 783        return irq;
 784}
 785
 786static void xen_free_irq(unsigned irq)
 787{
 788        struct irq_info *info = info_for_irq(irq);
 789        unsigned long flags;
 790
 791        if (WARN_ON(!info))
 792                return;
 793
 794        write_lock_irqsave(&evtchn_rwlock, flags);
 795
 796        if (!list_empty(&info->eoi_list))
 797                lateeoi_list_del(info);
 798
 799        list_del(&info->list);
 800
 801        set_info_for_irq(irq, NULL);
 802
 803        WARN_ON(info->refcnt > 0);
 804
 805        write_unlock_irqrestore(&evtchn_rwlock, flags);
 806
 807        kfree(info);
 808
 809        /* Legacy IRQ descriptors are managed by the arch. */
 810        if (irq < nr_legacy_irqs())
 811                return;
 812
 813        irq_free_desc(irq);
 814}
 815
 816static void xen_evtchn_close(evtchn_port_t port)
 817{
 818        struct evtchn_close close;
 819
 820        close.port = port;
 821        if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
 822                BUG();
 823}
 824
 825/* Not called for lateeoi events. */
 826static void event_handler_exit(struct irq_info *info)
 827{
 828        smp_store_release(&info->is_active, 0);
 829        clear_evtchn(info->evtchn);
 830}
 831
 832static void pirq_query_unmask(int irq)
 833{
 834        struct physdev_irq_status_query irq_status;
 835        struct irq_info *info = info_for_irq(irq);
 836
 837        BUG_ON(info->type != IRQT_PIRQ);
 838
 839        irq_status.irq = pirq_from_irq(irq);
 840        if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
 841                irq_status.flags = 0;
 842
 843        info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
 844        if (irq_status.flags & XENIRQSTAT_needs_eoi)
 845                info->u.pirq.flags |= PIRQ_NEEDS_EOI;
 846}
 847
 848static void eoi_pirq(struct irq_data *data)
 849{
 850        struct irq_info *info = info_for_irq(data->irq);
 851        evtchn_port_t evtchn = info ? info->evtchn : 0;
 852        struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
 853        int rc = 0;
 854
 855        if (!VALID_EVTCHN(evtchn))
 856                return;
 857
 858        event_handler_exit(info);
 859
 860        if (pirq_needs_eoi(data->irq)) {
 861                rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
 862                WARN_ON(rc);
 863        }
 864}
 865
 866static void mask_ack_pirq(struct irq_data *data)
 867{
 868        disable_dynirq(data);
 869        eoi_pirq(data);
 870}
 871
 872static unsigned int __startup_pirq(unsigned int irq)
 873{
 874        struct evtchn_bind_pirq bind_pirq;
 875        struct irq_info *info = info_for_irq(irq);
 876        evtchn_port_t evtchn = evtchn_from_irq(irq);
 877        int rc;
 878
 879        BUG_ON(info->type != IRQT_PIRQ);
 880
 881        if (VALID_EVTCHN(evtchn))
 882                goto out;
 883
 884        bind_pirq.pirq = pirq_from_irq(irq);
 885        /* NB. We are happy to share unless we are probing. */
 886        bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
 887                                        BIND_PIRQ__WILL_SHARE : 0;
 888        rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
 889        if (rc != 0) {
 890                pr_warn("Failed to obtain physical IRQ %d\n", irq);
 891                return 0;
 892        }
 893        evtchn = bind_pirq.port;
 894
 895        pirq_query_unmask(irq);
 896
 897        rc = set_evtchn_to_irq(evtchn, irq);
 898        if (rc)
 899                goto err;
 900
 901        info->evtchn = evtchn;
 902        bind_evtchn_to_cpu(evtchn, 0, false);
 903
 904        rc = xen_evtchn_port_setup(evtchn);
 905        if (rc)
 906                goto err;
 907
 908out:
 909        do_unmask(info, EVT_MASK_REASON_EXPLICIT);
 910
 911        eoi_pirq(irq_get_irq_data(irq));
 912
 913        return 0;
 914
 915err:
 916        pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
 917        xen_evtchn_close(evtchn);
 918        return 0;
 919}
 920
 921static unsigned int startup_pirq(struct irq_data *data)
 922{
 923        return __startup_pirq(data->irq);
 924}
 925
 926static void shutdown_pirq(struct irq_data *data)
 927{
 928        unsigned int irq = data->irq;
 929        struct irq_info *info = info_for_irq(irq);
 930        evtchn_port_t evtchn = evtchn_from_irq(irq);
 931
 932        BUG_ON(info->type != IRQT_PIRQ);
 933
 934        if (!VALID_EVTCHN(evtchn))
 935                return;
 936
 937        do_mask(info, EVT_MASK_REASON_EXPLICIT);
 938        xen_evtchn_close(evtchn);
 939        xen_irq_info_cleanup(info);
 940}
 941
 942static void enable_pirq(struct irq_data *data)
 943{
 944        enable_dynirq(data);
 945}
 946
 947static void disable_pirq(struct irq_data *data)
 948{
 949        disable_dynirq(data);
 950}
 951
 952int xen_irq_from_gsi(unsigned gsi)
 953{
 954        struct irq_info *info;
 955
 956        list_for_each_entry(info, &xen_irq_list_head, list) {
 957                if (info->type != IRQT_PIRQ)
 958                        continue;
 959
 960                if (info->u.pirq.gsi == gsi)
 961                        return info->irq;
 962        }
 963
 964        return -1;
 965}
 966EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
 967
 968static void __unbind_from_irq(unsigned int irq)
 969{
 970        evtchn_port_t evtchn = evtchn_from_irq(irq);
 971        struct irq_info *info = info_for_irq(irq);
 972
 973        if (info->refcnt > 0) {
 974                info->refcnt--;
 975                if (info->refcnt != 0)
 976                        return;
 977        }
 978
 979        if (VALID_EVTCHN(evtchn)) {
 980                unsigned int cpu = cpu_from_irq(irq);
 981                struct xenbus_device *dev;
 982
 983                xen_evtchn_close(evtchn);
 984
 985                switch (type_from_irq(irq)) {
 986                case IRQT_VIRQ:
 987                        per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
 988                        break;
 989                case IRQT_IPI:
 990                        per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
 991                        break;
 992                case IRQT_EVTCHN:
 993                        dev = info->u.interdomain;
 994                        if (dev)
 995                                atomic_dec(&dev->event_channels);
 996                        break;
 997                default:
 998                        break;
 999                }
1000
1001                xen_irq_info_cleanup(info);
1002        }
1003
1004        xen_free_irq(irq);
1005}
1006
1007/*
1008 * Do not make any assumptions regarding the relationship between the
1009 * IRQ number returned here and the Xen pirq argument.
1010 *
1011 * Note: We don't assign an event channel until the irq actually started
1012 * up.  Return an existing irq if we've already got one for the gsi.
1013 *
1014 * Shareable implies level triggered, not shareable implies edge
1015 * triggered here.
1016 */
1017int xen_bind_pirq_gsi_to_irq(unsigned gsi,
1018                             unsigned pirq, int shareable, char *name)
1019{
1020        int irq;
1021        struct physdev_irq irq_op;
1022        int ret;
1023
1024        mutex_lock(&irq_mapping_update_lock);
1025
1026        irq = xen_irq_from_gsi(gsi);
1027        if (irq != -1) {
1028                pr_info("%s: returning irq %d for gsi %u\n",
1029                        __func__, irq, gsi);
1030                goto out;
1031        }
1032
1033        irq = xen_allocate_irq_gsi(gsi);
1034        if (irq < 0)
1035                goto out;
1036
1037        irq_op.irq = irq;
1038        irq_op.vector = 0;
1039
1040        /* Only the privileged domain can do this. For non-priv, the pcifront
1041         * driver provides a PCI bus that does the call to do exactly
1042         * this in the priv domain. */
1043        if (xen_initial_domain() &&
1044            HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
1045                xen_free_irq(irq);
1046                irq = -ENOSPC;
1047                goto out;
1048        }
1049
1050        ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
1051                               shareable ? PIRQ_SHAREABLE : 0);
1052        if (ret < 0) {
1053                __unbind_from_irq(irq);
1054                irq = ret;
1055                goto out;
1056        }
1057
1058        pirq_query_unmask(irq);
1059        /* We try to use the handler with the appropriate semantic for the
1060         * type of interrupt: if the interrupt is an edge triggered
1061         * interrupt we use handle_edge_irq.
1062         *
1063         * On the other hand if the interrupt is level triggered we use
1064         * handle_fasteoi_irq like the native code does for this kind of
1065         * interrupts.
1066         *
1067         * Depending on the Xen version, pirq_needs_eoi might return true
1068         * not only for level triggered interrupts but for edge triggered
1069         * interrupts too. In any case Xen always honors the eoi mechanism,
1070         * not injecting any more pirqs of the same kind if the first one
1071         * hasn't received an eoi yet. Therefore using the fasteoi handler
1072         * is the right choice either way.
1073         */
1074        if (shareable)
1075                irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1076                                handle_fasteoi_irq, name);
1077        else
1078                irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1079                                handle_edge_irq, name);
1080
1081out:
1082        mutex_unlock(&irq_mapping_update_lock);
1083
1084        return irq;
1085}
1086
1087#ifdef CONFIG_PCI_MSI
1088int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
1089{
1090        int rc;
1091        struct physdev_get_free_pirq op_get_free_pirq;
1092
1093        op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
1094        rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
1095
1096        WARN_ONCE(rc == -ENOSYS,
1097                  "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
1098
1099        return rc ? -1 : op_get_free_pirq.pirq;
1100}
1101
1102int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
1103                             int pirq, int nvec, const char *name, domid_t domid)
1104{
1105        int i, irq, ret;
1106
1107        mutex_lock(&irq_mapping_update_lock);
1108
1109        irq = xen_allocate_irqs_dynamic(nvec);
1110        if (irq < 0)
1111                goto out;
1112
1113        for (i = 0; i < nvec; i++) {
1114                irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
1115
1116                ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
1117                                              i == 0 ? 0 : PIRQ_MSI_GROUP);
1118                if (ret < 0)
1119                        goto error_irq;
1120        }
1121
1122        ret = irq_set_msi_desc(irq, msidesc);
1123        if (ret < 0)
1124                goto error_irq;
1125out:
1126        mutex_unlock(&irq_mapping_update_lock);
1127        return irq;
1128error_irq:
1129        while (nvec--)
1130                __unbind_from_irq(irq + nvec);
1131        mutex_unlock(&irq_mapping_update_lock);
1132        return ret;
1133}
1134#endif
1135
1136int xen_destroy_irq(int irq)
1137{
1138        struct physdev_unmap_pirq unmap_irq;
1139        struct irq_info *info = info_for_irq(irq);
1140        int rc = -ENOENT;
1141
1142        mutex_lock(&irq_mapping_update_lock);
1143
1144        /*
1145         * If trying to remove a vector in a MSI group different
1146         * than the first one skip the PIRQ unmap unless this vector
1147         * is the first one in the group.
1148         */
1149        if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
1150                unmap_irq.pirq = info->u.pirq.pirq;
1151                unmap_irq.domid = info->u.pirq.domid;
1152                rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
1153                /* If another domain quits without making the pci_disable_msix
1154                 * call, the Xen hypervisor takes care of freeing the PIRQs
1155                 * (free_domain_pirqs).
1156                 */
1157                if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
1158                        pr_info("domain %d does not have %d anymore\n",
1159                                info->u.pirq.domid, info->u.pirq.pirq);
1160                else if (rc) {
1161                        pr_warn("unmap irq failed %d\n", rc);
1162                        goto out;
1163                }
1164        }
1165
1166        xen_free_irq(irq);
1167
1168out:
1169        mutex_unlock(&irq_mapping_update_lock);
1170        return rc;
1171}
1172
1173int xen_irq_from_pirq(unsigned pirq)
1174{
1175        int irq;
1176
1177        struct irq_info *info;
1178
1179        mutex_lock(&irq_mapping_update_lock);
1180
1181        list_for_each_entry(info, &xen_irq_list_head, list) {
1182                if (info->type != IRQT_PIRQ)
1183                        continue;
1184                irq = info->irq;
1185                if (info->u.pirq.pirq == pirq)
1186                        goto out;
1187        }
1188        irq = -1;
1189out:
1190        mutex_unlock(&irq_mapping_update_lock);
1191
1192        return irq;
1193}
1194
1195
1196int xen_pirq_from_irq(unsigned irq)
1197{
1198        return pirq_from_irq(irq);
1199}
1200EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
1201
1202static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
1203                                   struct xenbus_device *dev)
1204{
1205        int irq;
1206        int ret;
1207
1208        if (evtchn >= xen_evtchn_max_channels())
1209                return -ENOMEM;
1210
1211        mutex_lock(&irq_mapping_update_lock);
1212
1213        irq = get_evtchn_to_irq(evtchn);
1214
1215        if (irq == -1) {
1216                irq = xen_allocate_irq_dynamic();
1217                if (irq < 0)
1218                        goto out;
1219
1220                irq_set_chip_and_handler_name(irq, chip,
1221                                              handle_edge_irq, "event");
1222
1223                ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
1224                if (ret < 0) {
1225                        __unbind_from_irq(irq);
1226                        irq = ret;
1227                        goto out;
1228                }
1229                /*
1230                 * New interdomain events are initially bound to vCPU0 This
1231                 * is required to setup the event channel in the first
1232                 * place and also important for UP guests because the
1233                 * affinity setting is not invoked on them so nothing would
1234                 * bind the channel.
1235                 */
1236                bind_evtchn_to_cpu(evtchn, 0, false);
1237        } else {
1238                struct irq_info *info = info_for_irq(irq);
1239                WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
1240        }
1241
1242out:
1243        mutex_unlock(&irq_mapping_update_lock);
1244
1245        return irq;
1246}
1247
1248int bind_evtchn_to_irq(evtchn_port_t evtchn)
1249{
1250        return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL);
1251}
1252EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
1253
1254int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
1255{
1256        return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL);
1257}
1258EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
1259
1260static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
1261{
1262        struct evtchn_bind_ipi bind_ipi;
1263        evtchn_port_t evtchn;
1264        int ret, irq;
1265
1266        mutex_lock(&irq_mapping_update_lock);
1267
1268        irq = per_cpu(ipi_to_irq, cpu)[ipi];
1269
1270        if (irq == -1) {
1271                irq = xen_allocate_irq_dynamic();
1272                if (irq < 0)
1273                        goto out;
1274
1275                irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1276                                              handle_percpu_irq, "ipi");
1277
1278                bind_ipi.vcpu = xen_vcpu_nr(cpu);
1279                if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1280                                                &bind_ipi) != 0)
1281                        BUG();
1282                evtchn = bind_ipi.port;
1283
1284                ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
1285                if (ret < 0) {
1286                        __unbind_from_irq(irq);
1287                        irq = ret;
1288                        goto out;
1289                }
1290                /*
1291                 * Force the affinity mask to the target CPU so proc shows
1292                 * the correct target.
1293                 */
1294                bind_evtchn_to_cpu(evtchn, cpu, true);
1295        } else {
1296                struct irq_info *info = info_for_irq(irq);
1297                WARN_ON(info == NULL || info->type != IRQT_IPI);
1298        }
1299
1300 out:
1301        mutex_unlock(&irq_mapping_update_lock);
1302        return irq;
1303}
1304
1305static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
1306                                               evtchn_port_t remote_port,
1307                                               struct irq_chip *chip)
1308{
1309        struct evtchn_bind_interdomain bind_interdomain;
1310        int err;
1311
1312        bind_interdomain.remote_dom  = dev->otherend_id;
1313        bind_interdomain.remote_port = remote_port;
1314
1315        err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
1316                                          &bind_interdomain);
1317
1318        return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
1319                                               chip, dev);
1320}
1321
1322int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
1323                                           evtchn_port_t remote_port)
1324{
1325        return bind_interdomain_evtchn_to_irq_chip(dev, remote_port,
1326                                                   &xen_lateeoi_chip);
1327}
1328EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
1329
1330static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
1331{
1332        struct evtchn_status status;
1333        evtchn_port_t port;
1334        int rc = -ENOENT;
1335
1336        memset(&status, 0, sizeof(status));
1337        for (port = 0; port < xen_evtchn_max_channels(); port++) {
1338                status.dom = DOMID_SELF;
1339                status.port = port;
1340                rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
1341                if (rc < 0)
1342                        continue;
1343                if (status.status != EVTCHNSTAT_virq)
1344                        continue;
1345                if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
1346                        *evtchn = port;
1347                        break;
1348                }
1349        }
1350        return rc;
1351}
1352
1353/**
1354 * xen_evtchn_nr_channels - number of usable event channel ports
1355 *
1356 * This may be less than the maximum supported by the current
1357 * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
1358 * supported.
1359 */
1360unsigned xen_evtchn_nr_channels(void)
1361{
1362        return evtchn_ops->nr_channels();
1363}
1364EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
1365
1366int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
1367{
1368        struct evtchn_bind_virq bind_virq;
1369        evtchn_port_t evtchn = 0;
1370        int irq, ret;
1371
1372        mutex_lock(&irq_mapping_update_lock);
1373
1374        irq = per_cpu(virq_to_irq, cpu)[virq];
1375
1376        if (irq == -1) {
1377                irq = xen_allocate_irq_dynamic();
1378                if (irq < 0)
1379                        goto out;
1380
1381                if (percpu)
1382                        irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1383                                                      handle_percpu_irq, "virq");
1384                else
1385                        irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
1386                                                      handle_edge_irq, "virq");
1387
1388                bind_virq.virq = virq;
1389                bind_virq.vcpu = xen_vcpu_nr(cpu);
1390                ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1391                                                &bind_virq);
1392                if (ret == 0)
1393                        evtchn = bind_virq.port;
1394                else {
1395                        if (ret == -EEXIST)
1396                                ret = find_virq(virq, cpu, &evtchn);
1397                        BUG_ON(ret < 0);
1398                }
1399
1400                ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1401                if (ret < 0) {
1402                        __unbind_from_irq(irq);
1403                        irq = ret;
1404                        goto out;
1405                }
1406
1407                /*
1408                 * Force the affinity mask for percpu interrupts so proc
1409                 * shows the correct target.
1410                 */
1411                bind_evtchn_to_cpu(evtchn, cpu, percpu);
1412        } else {
1413                struct irq_info *info = info_for_irq(irq);
1414                WARN_ON(info == NULL || info->type != IRQT_VIRQ);
1415        }
1416
1417out:
1418        mutex_unlock(&irq_mapping_update_lock);
1419
1420        return irq;
1421}
1422
1423static void unbind_from_irq(unsigned int irq)
1424{
1425        mutex_lock(&irq_mapping_update_lock);
1426        __unbind_from_irq(irq);
1427        mutex_unlock(&irq_mapping_update_lock);
1428}
1429
1430static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
1431                                          irq_handler_t handler,
1432                                          unsigned long irqflags,
1433                                          const char *devname, void *dev_id,
1434                                          struct irq_chip *chip)
1435{
1436        int irq, retval;
1437
1438        irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL);
1439        if (irq < 0)
1440                return irq;
1441        retval = request_irq(irq, handler, irqflags, devname, dev_id);
1442        if (retval != 0) {
1443                unbind_from_irq(irq);
1444                return retval;
1445        }
1446
1447        return irq;
1448}
1449
1450int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
1451                              irq_handler_t handler,
1452                              unsigned long irqflags,
1453                              const char *devname, void *dev_id)
1454{
1455        return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1456                                              devname, dev_id,
1457                                              &xen_dynamic_chip);
1458}
1459EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
1460
1461int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
1462                                      irq_handler_t handler,
1463                                      unsigned long irqflags,
1464                                      const char *devname, void *dev_id)
1465{
1466        return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1467                                              devname, dev_id,
1468                                              &xen_lateeoi_chip);
1469}
1470EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
1471
1472static int bind_interdomain_evtchn_to_irqhandler_chip(
1473                struct xenbus_device *dev, evtchn_port_t remote_port,
1474                irq_handler_t handler, unsigned long irqflags,
1475                const char *devname, void *dev_id, struct irq_chip *chip)
1476{
1477        int irq, retval;
1478
1479        irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip);
1480        if (irq < 0)
1481                return irq;
1482
1483        retval = request_irq(irq, handler, irqflags, devname, dev_id);
1484        if (retval != 0) {
1485                unbind_from_irq(irq);
1486                return retval;
1487        }
1488
1489        return irq;
1490}
1491
1492int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev,
1493                                                  evtchn_port_t remote_port,
1494                                                  irq_handler_t handler,
1495                                                  unsigned long irqflags,
1496                                                  const char *devname,
1497                                                  void *dev_id)
1498{
1499        return bind_interdomain_evtchn_to_irqhandler_chip(dev,
1500                                remote_port, handler, irqflags, devname,
1501                                dev_id, &xen_lateeoi_chip);
1502}
1503EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
1504
1505int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
1506                            irq_handler_t handler,
1507                            unsigned long irqflags, const char *devname, void *dev_id)
1508{
1509        int irq, retval;
1510
1511        irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU);
1512        if (irq < 0)
1513                return irq;
1514        retval = request_irq(irq, handler, irqflags, devname, dev_id);
1515        if (retval != 0) {
1516                unbind_from_irq(irq);
1517                return retval;
1518        }
1519
1520        return irq;
1521}
1522EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
1523
1524int bind_ipi_to_irqhandler(enum ipi_vector ipi,
1525                           unsigned int cpu,
1526                           irq_handler_t handler,
1527                           unsigned long irqflags,
1528                           const char *devname,
1529                           void *dev_id)
1530{
1531        int irq, retval;
1532
1533        irq = bind_ipi_to_irq(ipi, cpu);
1534        if (irq < 0)
1535                return irq;
1536
1537        irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
1538        retval = request_irq(irq, handler, irqflags, devname, dev_id);
1539        if (retval != 0) {
1540                unbind_from_irq(irq);
1541                return retval;
1542        }
1543
1544        return irq;
1545}
1546
1547void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1548{
1549        struct irq_info *info = info_for_irq(irq);
1550
1551        if (WARN_ON(!info))
1552                return;
1553        free_irq(irq, dev_id);
1554        unbind_from_irq(irq);
1555}
1556EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1557
1558/**
1559 * xen_set_irq_priority() - set an event channel priority.
1560 * @irq:irq bound to an event channel.
1561 * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1562 */
1563int xen_set_irq_priority(unsigned irq, unsigned priority)
1564{
1565        struct evtchn_set_priority set_priority;
1566
1567        set_priority.port = evtchn_from_irq(irq);
1568        set_priority.priority = priority;
1569
1570        return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
1571                                           &set_priority);
1572}
1573EXPORT_SYMBOL_GPL(xen_set_irq_priority);
1574
1575int evtchn_make_refcounted(evtchn_port_t evtchn)
1576{
1577        int irq = get_evtchn_to_irq(evtchn);
1578        struct irq_info *info;
1579
1580        if (irq == -1)
1581                return -ENOENT;
1582
1583        info = info_for_irq(irq);
1584
1585        if (!info)
1586                return -ENOENT;
1587
1588        WARN_ON(info->refcnt != -1);
1589
1590        info->refcnt = 1;
1591
1592        return 0;
1593}
1594EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
1595
1596int evtchn_get(evtchn_port_t evtchn)
1597{
1598        int irq;
1599        struct irq_info *info;
1600        int err = -ENOENT;
1601
1602        if (evtchn >= xen_evtchn_max_channels())
1603                return -EINVAL;
1604
1605        mutex_lock(&irq_mapping_update_lock);
1606
1607        irq = get_evtchn_to_irq(evtchn);
1608        if (irq == -1)
1609                goto done;
1610
1611        info = info_for_irq(irq);
1612
1613        if (!info)
1614                goto done;
1615
1616        err = -EINVAL;
1617        if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
1618                goto done;
1619
1620        info->refcnt++;
1621        err = 0;
1622 done:
1623        mutex_unlock(&irq_mapping_update_lock);
1624
1625        return err;
1626}
1627EXPORT_SYMBOL_GPL(evtchn_get);
1628
1629void evtchn_put(evtchn_port_t evtchn)
1630{
1631        int irq = get_evtchn_to_irq(evtchn);
1632        if (WARN_ON(irq == -1))
1633                return;
1634        unbind_from_irq(irq);
1635}
1636EXPORT_SYMBOL_GPL(evtchn_put);
1637
1638void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1639{
1640        int irq;
1641
1642#ifdef CONFIG_X86
1643        if (unlikely(vector == XEN_NMI_VECTOR)) {
1644                int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu),
1645                                             NULL);
1646                if (rc < 0)
1647                        printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
1648                return;
1649        }
1650#endif
1651        irq = per_cpu(ipi_to_irq, cpu)[vector];
1652        BUG_ON(irq < 0);
1653        notify_remote_via_irq(irq);
1654}
1655
1656struct evtchn_loop_ctrl {
1657        ktime_t timeout;
1658        unsigned count;
1659        bool defer_eoi;
1660};
1661
1662void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
1663{
1664        int irq;
1665        struct irq_info *info;
1666        struct xenbus_device *dev;
1667
1668        irq = get_evtchn_to_irq(port);
1669        if (irq == -1)
1670                return;
1671
1672        /*
1673         * Check for timeout every 256 events.
1674         * We are setting the timeout value only after the first 256
1675         * events in order to not hurt the common case of few loop
1676         * iterations. The 256 is basically an arbitrary value.
1677         *
1678         * In case we are hitting the timeout we need to defer all further
1679         * EOIs in order to ensure to leave the event handling loop rather
1680         * sooner than later.
1681         */
1682        if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
1683                ktime_t kt = ktime_get();
1684
1685                if (!ctrl->timeout) {
1686                        kt = ktime_add_ms(kt,
1687                                          jiffies_to_msecs(event_loop_timeout));
1688                        ctrl->timeout = kt;
1689                } else if (kt > ctrl->timeout) {
1690                        ctrl->defer_eoi = true;
1691                }
1692        }
1693
1694        info = info_for_irq(irq);
1695        if (xchg_acquire(&info->is_active, 1))
1696                return;
1697
1698        dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
1699        if (dev)
1700                atomic_inc(&dev->events);
1701
1702        if (ctrl->defer_eoi) {
1703                info->eoi_cpu = smp_processor_id();
1704                info->irq_epoch = __this_cpu_read(irq_epoch);
1705                info->eoi_time = get_jiffies_64() + event_eoi_delay;
1706        }
1707
1708        generic_handle_irq(irq);
1709}
1710
1711static void __xen_evtchn_do_upcall(void)
1712{
1713        struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1714        int cpu = smp_processor_id();
1715        struct evtchn_loop_ctrl ctrl = { 0 };
1716
1717        read_lock(&evtchn_rwlock);
1718
1719        do {
1720                vcpu_info->evtchn_upcall_pending = 0;
1721
1722                xen_evtchn_handle_events(cpu, &ctrl);
1723
1724                BUG_ON(!irqs_disabled());
1725
1726                virt_rmb(); /* Hypervisor can set upcall pending. */
1727
1728        } while (vcpu_info->evtchn_upcall_pending);
1729
1730        read_unlock(&evtchn_rwlock);
1731
1732        /*
1733         * Increment irq_epoch only now to defer EOIs only for
1734         * xen_irq_lateeoi() invocations occurring from inside the loop
1735         * above.
1736         */
1737        __this_cpu_inc(irq_epoch);
1738}
1739
1740void xen_evtchn_do_upcall(struct pt_regs *regs)
1741{
1742        struct pt_regs *old_regs = set_irq_regs(regs);
1743
1744        irq_enter();
1745
1746        __xen_evtchn_do_upcall();
1747
1748        irq_exit();
1749        set_irq_regs(old_regs);
1750}
1751
1752void xen_hvm_evtchn_do_upcall(void)
1753{
1754        __xen_evtchn_do_upcall();
1755}
1756EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
1757
1758/* Rebind a new event channel to an existing irq. */
1759void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
1760{
1761        struct irq_info *info = info_for_irq(irq);
1762
1763        if (WARN_ON(!info))
1764                return;
1765
1766        /* Make sure the irq is masked, since the new event channel
1767           will also be masked. */
1768        disable_irq(irq);
1769
1770        mutex_lock(&irq_mapping_update_lock);
1771
1772        /* After resume the irq<->evtchn mappings are all cleared out */
1773        BUG_ON(get_evtchn_to_irq(evtchn) != -1);
1774        /* Expect irq to have been bound before,
1775           so there should be a proper type */
1776        BUG_ON(info->type == IRQT_UNBOUND);
1777
1778        (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
1779
1780        mutex_unlock(&irq_mapping_update_lock);
1781
1782        bind_evtchn_to_cpu(evtchn, info->cpu, false);
1783
1784        /* Unmask the event channel. */
1785        enable_irq(irq);
1786}
1787
1788/* Rebind an evtchn so that it gets delivered to a specific cpu */
1789static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
1790{
1791        struct evtchn_bind_vcpu bind_vcpu;
1792        evtchn_port_t evtchn = info ? info->evtchn : 0;
1793
1794        if (!VALID_EVTCHN(evtchn))
1795                return -1;
1796
1797        if (!xen_support_evtchn_rebind())
1798                return -1;
1799
1800        /* Send future instances of this interrupt to other vcpu. */
1801        bind_vcpu.port = evtchn;
1802        bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
1803
1804        /*
1805         * Mask the event while changing the VCPU binding to prevent
1806         * it being delivered on an unexpected VCPU.
1807         */
1808        do_mask(info, EVT_MASK_REASON_TEMPORARY);
1809
1810        /*
1811         * If this fails, it usually just indicates that we're dealing with a
1812         * virq or IPI channel, which don't actually need to be rebound. Ignore
1813         * it, but don't do the xenlinux-level rebind in that case.
1814         */
1815        if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
1816                bind_evtchn_to_cpu(evtchn, tcpu, false);
1817
1818        do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1819
1820        return 0;
1821}
1822
1823/*
1824 * Find the CPU within @dest mask which has the least number of channels
1825 * assigned. This is not precise as the per cpu counts can be modified
1826 * concurrently.
1827 */
1828static unsigned int select_target_cpu(const struct cpumask *dest)
1829{
1830        unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
1831
1832        for_each_cpu_and(cpu, dest, cpu_online_mask) {
1833                unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
1834
1835                if (curch < minch) {
1836                        minch = curch;
1837                        best_cpu = cpu;
1838                }
1839        }
1840
1841        /*
1842         * Catch the unlikely case that dest contains no online CPUs. Can't
1843         * recurse.
1844         */
1845        if (best_cpu == UINT_MAX)
1846                return select_target_cpu(cpu_online_mask);
1847
1848        return best_cpu;
1849}
1850
1851static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1852                            bool force)
1853{
1854        unsigned int tcpu = select_target_cpu(dest);
1855        int ret;
1856
1857        ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu);
1858        if (!ret)
1859                irq_data_update_effective_affinity(data, cpumask_of(tcpu));
1860
1861        return ret;
1862}
1863
1864static void enable_dynirq(struct irq_data *data)
1865{
1866        struct irq_info *info = info_for_irq(data->irq);
1867        evtchn_port_t evtchn = info ? info->evtchn : 0;
1868
1869        if (VALID_EVTCHN(evtchn))
1870                do_unmask(info, EVT_MASK_REASON_EXPLICIT);
1871}
1872
1873static void disable_dynirq(struct irq_data *data)
1874{
1875        struct irq_info *info = info_for_irq(data->irq);
1876        evtchn_port_t evtchn = info ? info->evtchn : 0;
1877
1878        if (VALID_EVTCHN(evtchn))
1879                do_mask(info, EVT_MASK_REASON_EXPLICIT);
1880}
1881
1882static void ack_dynirq(struct irq_data *data)
1883{
1884        struct irq_info *info = info_for_irq(data->irq);
1885        evtchn_port_t evtchn = info ? info->evtchn : 0;
1886
1887        if (VALID_EVTCHN(evtchn))
1888                event_handler_exit(info);
1889}
1890
1891static void mask_ack_dynirq(struct irq_data *data)
1892{
1893        disable_dynirq(data);
1894        ack_dynirq(data);
1895}
1896
1897static void lateeoi_ack_dynirq(struct irq_data *data)
1898{
1899        struct irq_info *info = info_for_irq(data->irq);
1900        evtchn_port_t evtchn = info ? info->evtchn : 0;
1901
1902        if (VALID_EVTCHN(evtchn)) {
1903                do_mask(info, EVT_MASK_REASON_EOI_PENDING);
1904                /*
1905                 * Don't call event_handler_exit().
1906                 * Need to keep is_active non-zero in order to ignore re-raised
1907                 * events after cpu affinity changes while a lateeoi is pending.
1908                 */
1909                clear_evtchn(evtchn);
1910        }
1911}
1912
1913static void lateeoi_mask_ack_dynirq(struct irq_data *data)
1914{
1915        struct irq_info *info = info_for_irq(data->irq);
1916        evtchn_port_t evtchn = info ? info->evtchn : 0;
1917
1918        if (VALID_EVTCHN(evtchn)) {
1919                do_mask(info, EVT_MASK_REASON_EXPLICIT);
1920                event_handler_exit(info);
1921        }
1922}
1923
1924static int retrigger_dynirq(struct irq_data *data)
1925{
1926        struct irq_info *info = info_for_irq(data->irq);
1927        evtchn_port_t evtchn = info ? info->evtchn : 0;
1928
1929        if (!VALID_EVTCHN(evtchn))
1930                return 0;
1931
1932        do_mask(info, EVT_MASK_REASON_TEMPORARY);
1933        set_evtchn(evtchn);
1934        do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1935
1936        return 1;
1937}
1938
1939static void restore_pirqs(void)
1940{
1941        int pirq, rc, irq, gsi;
1942        struct physdev_map_pirq map_irq;
1943        struct irq_info *info;
1944
1945        list_for_each_entry(info, &xen_irq_list_head, list) {
1946                if (info->type != IRQT_PIRQ)
1947                        continue;
1948
1949                pirq = info->u.pirq.pirq;
1950                gsi = info->u.pirq.gsi;
1951                irq = info->irq;
1952
1953                /* save/restore of PT devices doesn't work, so at this point the
1954                 * only devices present are GSI based emulated devices */
1955                if (!gsi)
1956                        continue;
1957
1958                map_irq.domid = DOMID_SELF;
1959                map_irq.type = MAP_PIRQ_TYPE_GSI;
1960                map_irq.index = gsi;
1961                map_irq.pirq = pirq;
1962
1963                rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1964                if (rc) {
1965                        pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
1966                                gsi, irq, pirq, rc);
1967                        xen_free_irq(irq);
1968                        continue;
1969                }
1970
1971                printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1972
1973                __startup_pirq(irq);
1974        }
1975}
1976
1977static void restore_cpu_virqs(unsigned int cpu)
1978{
1979        struct evtchn_bind_virq bind_virq;
1980        evtchn_port_t evtchn;
1981        int virq, irq;
1982
1983        for (virq = 0; virq < NR_VIRQS; virq++) {
1984                if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
1985                        continue;
1986
1987                BUG_ON(virq_from_irq(irq) != virq);
1988
1989                /* Get a new binding from Xen. */
1990                bind_virq.virq = virq;
1991                bind_virq.vcpu = xen_vcpu_nr(cpu);
1992                if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1993                                                &bind_virq) != 0)
1994                        BUG();
1995                evtchn = bind_virq.port;
1996
1997                /* Record the new mapping. */
1998                (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1999                /* The affinity mask is still valid */
2000                bind_evtchn_to_cpu(evtchn, cpu, false);
2001        }
2002}
2003
2004static void restore_cpu_ipis(unsigned int cpu)
2005{
2006        struct evtchn_bind_ipi bind_ipi;
2007        evtchn_port_t evtchn;
2008        int ipi, irq;
2009
2010        for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
2011                if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
2012                        continue;
2013
2014                BUG_ON(ipi_from_irq(irq) != ipi);
2015
2016                /* Get a new binding from Xen. */
2017                bind_ipi.vcpu = xen_vcpu_nr(cpu);
2018                if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
2019                                                &bind_ipi) != 0)
2020                        BUG();
2021                evtchn = bind_ipi.port;
2022
2023                /* Record the new mapping. */
2024                (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
2025                /* The affinity mask is still valid */
2026                bind_evtchn_to_cpu(evtchn, cpu, false);
2027        }
2028}
2029
2030/* Clear an irq's pending state, in preparation for polling on it */
2031void xen_clear_irq_pending(int irq)
2032{
2033        struct irq_info *info = info_for_irq(irq);
2034        evtchn_port_t evtchn = info ? info->evtchn : 0;
2035
2036        if (VALID_EVTCHN(evtchn))
2037                event_handler_exit(info);
2038}
2039EXPORT_SYMBOL(xen_clear_irq_pending);
2040void xen_set_irq_pending(int irq)
2041{
2042        evtchn_port_t evtchn = evtchn_from_irq(irq);
2043
2044        if (VALID_EVTCHN(evtchn))
2045                set_evtchn(evtchn);
2046}
2047
2048bool xen_test_irq_pending(int irq)
2049{
2050        evtchn_port_t evtchn = evtchn_from_irq(irq);
2051        bool ret = false;
2052
2053        if (VALID_EVTCHN(evtchn))
2054                ret = test_evtchn(evtchn);
2055
2056        return ret;
2057}
2058
2059/* Poll waiting for an irq to become pending with timeout.  In the usual case,
2060 * the irq will be disabled so it won't deliver an interrupt. */
2061void xen_poll_irq_timeout(int irq, u64 timeout)
2062{
2063        evtchn_port_t evtchn = evtchn_from_irq(irq);
2064
2065        if (VALID_EVTCHN(evtchn)) {
2066                struct sched_poll poll;
2067
2068                poll.nr_ports = 1;
2069                poll.timeout = timeout;
2070                set_xen_guest_handle(poll.ports, &evtchn);
2071
2072                if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
2073                        BUG();
2074        }
2075}
2076EXPORT_SYMBOL(xen_poll_irq_timeout);
2077/* Poll waiting for an irq to become pending.  In the usual case, the
2078 * irq will be disabled so it won't deliver an interrupt. */
2079void xen_poll_irq(int irq)
2080{
2081        xen_poll_irq_timeout(irq, 0 /* no timeout */);
2082}
2083
2084/* Check whether the IRQ line is shared with other guests. */
2085int xen_test_irq_shared(int irq)
2086{
2087        struct irq_info *info = info_for_irq(irq);
2088        struct physdev_irq_status_query irq_status;
2089
2090        if (WARN_ON(!info))
2091                return -ENOENT;
2092
2093        irq_status.irq = info->u.pirq.pirq;
2094
2095        if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
2096                return 0;
2097        return !(irq_status.flags & XENIRQSTAT_shared);
2098}
2099EXPORT_SYMBOL_GPL(xen_test_irq_shared);
2100
2101void xen_irq_resume(void)
2102{
2103        unsigned int cpu;
2104        struct irq_info *info;
2105
2106        /* New event-channel space is not 'live' yet. */
2107        xen_evtchn_resume();
2108
2109        /* No IRQ <-> event-channel mappings. */
2110        list_for_each_entry(info, &xen_irq_list_head, list) {
2111                /* Zap event-channel binding */
2112                info->evtchn = 0;
2113                /* Adjust accounting */
2114                channels_on_cpu_dec(info);
2115        }
2116
2117        clear_evtchn_to_irq_all();
2118
2119        for_each_possible_cpu(cpu) {
2120                restore_cpu_virqs(cpu);
2121                restore_cpu_ipis(cpu);
2122        }
2123
2124        restore_pirqs();
2125}
2126
2127static struct irq_chip xen_dynamic_chip __read_mostly = {
2128        .name                   = "xen-dyn",
2129
2130        .irq_disable            = disable_dynirq,
2131        .irq_mask               = disable_dynirq,
2132        .irq_unmask             = enable_dynirq,
2133
2134        .irq_ack                = ack_dynirq,
2135        .irq_mask_ack           = mask_ack_dynirq,
2136
2137        .irq_set_affinity       = set_affinity_irq,
2138        .irq_retrigger          = retrigger_dynirq,
2139};
2140
2141static struct irq_chip xen_lateeoi_chip __read_mostly = {
2142        /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
2143        .name                   = "xen-dyn-lateeoi",
2144
2145        .irq_disable            = disable_dynirq,
2146        .irq_mask               = disable_dynirq,
2147        .irq_unmask             = enable_dynirq,
2148
2149        .irq_ack                = lateeoi_ack_dynirq,
2150        .irq_mask_ack           = lateeoi_mask_ack_dynirq,
2151
2152        .irq_set_affinity       = set_affinity_irq,
2153        .irq_retrigger          = retrigger_dynirq,
2154};
2155
2156static struct irq_chip xen_pirq_chip __read_mostly = {
2157        .name                   = "xen-pirq",
2158
2159        .irq_startup            = startup_pirq,
2160        .irq_shutdown           = shutdown_pirq,
2161        .irq_enable             = enable_pirq,
2162        .irq_disable            = disable_pirq,
2163
2164        .irq_mask               = disable_dynirq,
2165        .irq_unmask             = enable_dynirq,
2166
2167        .irq_ack                = eoi_pirq,
2168        .irq_eoi                = eoi_pirq,
2169        .irq_mask_ack           = mask_ack_pirq,
2170
2171        .irq_set_affinity       = set_affinity_irq,
2172
2173        .irq_retrigger          = retrigger_dynirq,
2174};
2175
2176static struct irq_chip xen_percpu_chip __read_mostly = {
2177        .name                   = "xen-percpu",
2178
2179        .irq_disable            = disable_dynirq,
2180        .irq_mask               = disable_dynirq,
2181        .irq_unmask             = enable_dynirq,
2182
2183        .irq_ack                = ack_dynirq,
2184};
2185
2186#ifdef CONFIG_XEN_PVHVM
2187/* Vector callbacks are better than PCI interrupts to receive event
2188 * channel notifications because we can receive vector callbacks on any
2189 * vcpu and we don't need PCI support or APIC interactions. */
2190void xen_setup_callback_vector(void)
2191{
2192        uint64_t callback_via;
2193
2194        if (xen_have_vector_callback) {
2195                callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
2196                if (xen_set_callback_via(callback_via)) {
2197                        pr_err("Request for Xen HVM callback vector failed\n");
2198                        xen_have_vector_callback = 0;
2199                }
2200        }
2201}
2202
2203static __init void xen_alloc_callback_vector(void)
2204{
2205        if (!xen_have_vector_callback)
2206                return;
2207
2208        pr_info("Xen HVM callback vector for event delivery is enabled\n");
2209        alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
2210}
2211#else
2212void xen_setup_callback_vector(void) {}
2213static inline void xen_alloc_callback_vector(void) {}
2214#endif
2215
2216bool xen_fifo_events = true;
2217module_param_named(fifo_events, xen_fifo_events, bool, 0);
2218
2219static int xen_evtchn_cpu_prepare(unsigned int cpu)
2220{
2221        int ret = 0;
2222
2223        xen_cpu_init_eoi(cpu);
2224
2225        if (evtchn_ops->percpu_init)
2226                ret = evtchn_ops->percpu_init(cpu);
2227
2228        return ret;
2229}
2230
2231static int xen_evtchn_cpu_dead(unsigned int cpu)
2232{
2233        int ret = 0;
2234
2235        if (evtchn_ops->percpu_deinit)
2236                ret = evtchn_ops->percpu_deinit(cpu);
2237
2238        return ret;
2239}
2240
2241void __init xen_init_IRQ(void)
2242{
2243        int ret = -EINVAL;
2244        evtchn_port_t evtchn;
2245
2246        if (xen_fifo_events)
2247                ret = xen_evtchn_fifo_init();
2248        if (ret < 0) {
2249                xen_evtchn_2l_init();
2250                xen_fifo_events = false;
2251        }
2252
2253        xen_cpu_init_eoi(smp_processor_id());
2254
2255        cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
2256                                  "xen/evtchn:prepare",
2257                                  xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
2258
2259        evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
2260                                sizeof(*evtchn_to_irq), GFP_KERNEL);
2261        BUG_ON(!evtchn_to_irq);
2262
2263        /* No event channels are 'live' right now. */
2264        for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
2265                mask_evtchn(evtchn);
2266
2267        pirq_needs_eoi = pirq_needs_eoi_flag;
2268
2269#ifdef CONFIG_X86
2270        if (xen_pv_domain()) {
2271                if (xen_initial_domain())
2272                        pci_xen_initial_domain();
2273        }
2274        if (xen_feature(XENFEAT_hvm_callback_vector)) {
2275                xen_setup_callback_vector();
2276                xen_alloc_callback_vector();
2277        }
2278
2279        if (xen_hvm_domain()) {
2280                native_init_IRQ();
2281                /* pci_xen_hvm_init must be called after native_init_IRQ so that
2282                 * __acpi_register_gsi can point at the right function */
2283                pci_xen_hvm_init();
2284        } else {
2285                int rc;
2286                struct physdev_pirq_eoi_gmfn eoi_gmfn;
2287
2288                pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
2289                eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
2290                rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
2291                if (rc != 0) {
2292                        free_page((unsigned long) pirq_eoi_map);
2293                        pirq_eoi_map = NULL;
2294                } else
2295                        pirq_needs_eoi = pirq_check_eoi_map;
2296        }
2297#endif
2298}
2299