linux/arch/x86/hyperv/irqdomain.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
   5 *
   6 * Authors:
   7 *  Sunil Muthuswamy <sunilmut@microsoft.com>
   8 *  Wei Liu <wei.liu@kernel.org>
   9 */
  10
  11#include <linux/pci.h>
  12#include <linux/irq.h>
  13#include <asm/mshyperv.h>
  14
  15static int hv_map_interrupt(union hv_device_id device_id, bool level,
  16                int cpu, int vector, struct hv_interrupt_entry *entry)
  17{
  18        struct hv_input_map_device_interrupt *input;
  19        struct hv_output_map_device_interrupt *output;
  20        struct hv_device_interrupt_descriptor *intr_desc;
  21        unsigned long flags;
  22        u64 status;
  23        int nr_bank, var_size;
  24
  25        local_irq_save(flags);
  26
  27        input = *this_cpu_ptr(hyperv_pcpu_input_arg);
  28        output = *this_cpu_ptr(hyperv_pcpu_output_arg);
  29
  30        intr_desc = &input->interrupt_descriptor;
  31        memset(input, 0, sizeof(*input));
  32        input->partition_id = hv_current_partition_id;
  33        input->device_id = device_id.as_uint64;
  34        intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
  35        intr_desc->vector_count = 1;
  36        intr_desc->target.vector = vector;
  37
  38        if (level)
  39                intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
  40        else
  41                intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
  42
  43        intr_desc->target.vp_set.valid_bank_mask = 0;
  44        intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
  45        nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));
  46        if (nr_bank < 0) {
  47                local_irq_restore(flags);
  48                pr_err("%s: unable to generate VP set\n", __func__);
  49                return EINVAL;
  50        }
  51        intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
  52
  53        /*
  54         * var-sized hypercall, var-size starts after vp_mask (thus
  55         * vp_set.format does not count, but vp_set.valid_bank_mask
  56         * does).
  57         */
  58        var_size = nr_bank + 1;
  59
  60        status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
  61                        input, output);
  62        *entry = output->interrupt_entry;
  63
  64        local_irq_restore(flags);
  65
  66        if (!hv_result_success(status))
  67                pr_err("%s: hypercall failed, status %lld\n", __func__, status);
  68
  69        return hv_result(status);
  70}
  71
  72static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
  73{
  74        unsigned long flags;
  75        struct hv_input_unmap_device_interrupt *input;
  76        struct hv_interrupt_entry *intr_entry;
  77        u64 status;
  78
  79        local_irq_save(flags);
  80        input = *this_cpu_ptr(hyperv_pcpu_input_arg);
  81
  82        memset(input, 0, sizeof(*input));
  83        intr_entry = &input->interrupt_entry;
  84        input->partition_id = hv_current_partition_id;
  85        input->device_id = id;
  86        *intr_entry = *old_entry;
  87
  88        status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
  89        local_irq_restore(flags);
  90
  91        return hv_result(status);
  92}
  93
  94#ifdef CONFIG_PCI_MSI
  95struct rid_data {
  96        struct pci_dev *bridge;
  97        u32 rid;
  98};
  99
 100static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data)
 101{
 102        struct rid_data *rd = data;
 103        u8 bus = PCI_BUS_NUM(rd->rid);
 104
 105        if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) {
 106                rd->bridge = pdev;
 107                rd->rid = alias;
 108        }
 109
 110        return 0;
 111}
 112
 113static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
 114{
 115        union hv_device_id dev_id;
 116        struct rid_data data = {
 117                .bridge = NULL,
 118                .rid = PCI_DEVID(dev->bus->number, dev->devfn)
 119        };
 120
 121        pci_for_each_dma_alias(dev, get_rid_cb, &data);
 122
 123        dev_id.as_uint64 = 0;
 124        dev_id.device_type = HV_DEVICE_TYPE_PCI;
 125        dev_id.pci.segment = pci_domain_nr(dev->bus);
 126
 127        dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid);
 128        dev_id.pci.bdf.device = PCI_SLOT(data.rid);
 129        dev_id.pci.bdf.function = PCI_FUNC(data.rid);
 130        dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE;
 131
 132        if (data.bridge) {
 133                int pos;
 134
 135                /*
 136                 * Microsoft Hypervisor requires a bus range when the bridge is
 137                 * running in PCI-X mode.
 138                 *
 139                 * To distinguish conventional vs PCI-X bridge, we can check
 140                 * the bridge's PCI-X Secondary Status Register, Secondary Bus
 141                 * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge
 142                 * Specification Revision 1.0 5.2.2.1.3.
 143                 *
 144                 * Value zero means it is in conventional mode, otherwise it is
 145                 * in PCI-X mode.
 146                 */
 147
 148                pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX);
 149                if (pos) {
 150                        u16 status;
 151
 152                        pci_read_config_word(data.bridge, pos +
 153                                        PCI_X_BRIDGE_SSTATUS, &status);
 154
 155                        if (status & PCI_X_SSTATUS_FREQ) {
 156                                /* Non-zero, PCI-X mode */
 157                                u8 sec_bus, sub_bus;
 158
 159                                dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE;
 160
 161                                pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus);
 162                                dev_id.pci.shadow_bus_range.secondary_bus = sec_bus;
 163                                pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus);
 164                                dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus;
 165                        }
 166                }
 167        }
 168
 169        return dev_id;
 170}
 171
 172static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector,
 173                                struct hv_interrupt_entry *entry)
 174{
 175        union hv_device_id device_id = hv_build_pci_dev_id(dev);
 176
 177        return hv_map_interrupt(device_id, false, cpu, vector, entry);
 178}
 179
 180static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
 181{
 182        /* High address is always 0 */
 183        msg->address_hi = 0;
 184        msg->address_lo = entry->msi_entry.address.as_uint32;
 185        msg->data = entry->msi_entry.data.as_uint32;
 186}
 187
 188static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
 189static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 190{
 191        struct msi_desc *msidesc;
 192        struct pci_dev *dev;
 193        struct hv_interrupt_entry out_entry, *stored_entry;
 194        struct irq_cfg *cfg = irqd_cfg(data);
 195        cpumask_t *affinity;
 196        int cpu;
 197        u64 status;
 198
 199        msidesc = irq_data_get_msi_desc(data);
 200        dev = msi_desc_to_pci_dev(msidesc);
 201
 202        if (!cfg) {
 203                pr_debug("%s: cfg is NULL", __func__);
 204                return;
 205        }
 206
 207        affinity = irq_data_get_effective_affinity_mask(data);
 208        cpu = cpumask_first_and(affinity, cpu_online_mask);
 209
 210        if (data->chip_data) {
 211                /*
 212                 * This interrupt is already mapped. Let's unmap first.
 213                 *
 214                 * We don't use retarget interrupt hypercalls here because
 215                 * Microsoft Hypervisor doens't allow root to change the vector
 216                 * or specify VPs outside of the set that is initially used
 217                 * during mapping.
 218                 */
 219                stored_entry = data->chip_data;
 220                data->chip_data = NULL;
 221
 222                status = hv_unmap_msi_interrupt(dev, stored_entry);
 223
 224                kfree(stored_entry);
 225
 226                if (status != HV_STATUS_SUCCESS) {
 227                        pr_debug("%s: failed to unmap, status %lld", __func__, status);
 228                        return;
 229                }
 230        }
 231
 232        stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
 233        if (!stored_entry) {
 234                pr_debug("%s: failed to allocate chip data\n", __func__);
 235                return;
 236        }
 237
 238        status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry);
 239        if (status != HV_STATUS_SUCCESS) {
 240                kfree(stored_entry);
 241                return;
 242        }
 243
 244        *stored_entry = out_entry;
 245        data->chip_data = stored_entry;
 246        entry_to_msi_msg(&out_entry, msg);
 247
 248        return;
 249}
 250
 251static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry)
 252{
 253        return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry);
 254}
 255
 256static void hv_teardown_msi_irq_common(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
 257{
 258        u64 status;
 259        struct hv_interrupt_entry old_entry;
 260        struct irq_desc *desc;
 261        struct irq_data *data;
 262        struct msi_msg msg;
 263
 264        desc = irq_to_desc(irq);
 265        if (!desc) {
 266                pr_debug("%s: no irq desc\n", __func__);
 267                return;
 268        }
 269
 270        data = &desc->irq_data;
 271        if (!data) {
 272                pr_debug("%s: no irq data\n", __func__);
 273                return;
 274        }
 275
 276        if (!data->chip_data) {
 277                pr_debug("%s: no chip data\n!", __func__);
 278                return;
 279        }
 280
 281        old_entry = *(struct hv_interrupt_entry *)data->chip_data;
 282        entry_to_msi_msg(&old_entry, &msg);
 283
 284        kfree(data->chip_data);
 285        data->chip_data = NULL;
 286
 287        status = hv_unmap_msi_interrupt(dev, &old_entry);
 288
 289        if (status != HV_STATUS_SUCCESS) {
 290                pr_err("%s: hypercall failed, status %lld\n", __func__, status);
 291                return;
 292        }
 293}
 294
 295static void hv_msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
 296{
 297        int i;
 298        struct msi_desc *entry;
 299        struct pci_dev *pdev;
 300
 301        if (WARN_ON_ONCE(!dev_is_pci(dev)))
 302                return;
 303
 304        pdev = to_pci_dev(dev);
 305
 306        for_each_pci_msi_entry(entry, pdev) {
 307                if (entry->irq) {
 308                        for (i = 0; i < entry->nvec_used; i++) {
 309                                hv_teardown_msi_irq_common(pdev, entry, entry->irq + i);
 310                                irq_domain_free_irqs(entry->irq + i, 1);
 311                        }
 312                }
 313        }
 314}
 315
 316/*
 317 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
 318 * which implement the MSI or MSI-X Capability Structure.
 319 */
 320static struct irq_chip hv_pci_msi_controller = {
 321        .name                   = "HV-PCI-MSI",
 322        .irq_unmask             = pci_msi_unmask_irq,
 323        .irq_mask               = pci_msi_mask_irq,
 324        .irq_ack                = irq_chip_ack_parent,
 325        .irq_retrigger          = irq_chip_retrigger_hierarchy,
 326        .irq_compose_msi_msg    = hv_irq_compose_msi_msg,
 327        .irq_set_affinity       = msi_domain_set_affinity,
 328        .flags                  = IRQCHIP_SKIP_SET_WAKE,
 329};
 330
 331static struct msi_domain_ops pci_msi_domain_ops = {
 332        .domain_free_irqs       = hv_msi_domain_free_irqs,
 333        .msi_prepare            = pci_msi_prepare,
 334};
 335
 336static struct msi_domain_info hv_pci_msi_domain_info = {
 337        .flags          = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
 338                          MSI_FLAG_PCI_MSIX,
 339        .ops            = &pci_msi_domain_ops,
 340        .chip           = &hv_pci_msi_controller,
 341        .handler        = handle_edge_irq,
 342        .handler_name   = "edge",
 343};
 344
 345struct irq_domain * __init hv_create_pci_msi_domain(void)
 346{
 347        struct irq_domain *d = NULL;
 348        struct fwnode_handle *fn;
 349
 350        fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI");
 351        if (fn)
 352                d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain);
 353
 354        /* No point in going further if we can't get an irq domain */
 355        BUG_ON(!d);
 356
 357        return d;
 358}
 359
 360#endif /* CONFIG_PCI_MSI */
 361
 362int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry)
 363{
 364        union hv_device_id device_id;
 365
 366        device_id.as_uint64 = 0;
 367        device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
 368        device_id.ioapic.ioapic_id = (u8)ioapic_id;
 369
 370        return hv_unmap_interrupt(device_id.as_uint64, entry);
 371}
 372EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
 373
 374int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
 375                struct hv_interrupt_entry *entry)
 376{
 377        union hv_device_id device_id;
 378
 379        device_id.as_uint64 = 0;
 380        device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
 381        device_id.ioapic.ioapic_id = (u8)ioapic_id;
 382
 383        return hv_map_interrupt(device_id, level, cpu, vector, entry);
 384}
 385EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
 386