1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50#include <linux/kernel.h>
51#include <linux/module.h>
52#include <linux/pci.h>
53#include <linux/semaphore.h>
54#include <linux/irqdomain.h>
55#include <asm/irqdomain.h>
56#include <asm/apic.h>
57#include <linux/msi.h>
58#include <linux/hyperv.h>
59#include <asm/mshyperv.h>
60
61
62
63
64
65
66#define PCI_MAKE_VERSION(major, minor) ((u32)(((major) << 16) | (major)))
67#define PCI_MAJOR_VERSION(version) ((u32)(version) >> 16)
68#define PCI_MINOR_VERSION(version) ((u32)(version) & 0xff)
69
70enum {
71 PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),
72 PCI_PROTOCOL_VERSION_CURRENT = PCI_PROTOCOL_VERSION_1_1
73};
74
75#define PCI_CONFIG_MMIO_LENGTH 0x2000
76#define CFG_PAGE_OFFSET 0x1000
77#define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
78
79#define MAX_SUPPORTED_MSI_MESSAGES 0x400
80
81
82
83
84
85enum pci_message_type {
86
87
88
89 PCI_MESSAGE_BASE = 0x42490000,
90 PCI_BUS_RELATIONS = PCI_MESSAGE_BASE + 0,
91 PCI_QUERY_BUS_RELATIONS = PCI_MESSAGE_BASE + 1,
92 PCI_POWER_STATE_CHANGE = PCI_MESSAGE_BASE + 4,
93 PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5,
94 PCI_QUERY_RESOURCE_RESOURCES = PCI_MESSAGE_BASE + 6,
95 PCI_BUS_D0ENTRY = PCI_MESSAGE_BASE + 7,
96 PCI_BUS_D0EXIT = PCI_MESSAGE_BASE + 8,
97 PCI_READ_BLOCK = PCI_MESSAGE_BASE + 9,
98 PCI_WRITE_BLOCK = PCI_MESSAGE_BASE + 0xA,
99 PCI_EJECT = PCI_MESSAGE_BASE + 0xB,
100 PCI_QUERY_STOP = PCI_MESSAGE_BASE + 0xC,
101 PCI_REENABLE = PCI_MESSAGE_BASE + 0xD,
102 PCI_QUERY_STOP_FAILED = PCI_MESSAGE_BASE + 0xE,
103 PCI_EJECTION_COMPLETE = PCI_MESSAGE_BASE + 0xF,
104 PCI_RESOURCES_ASSIGNED = PCI_MESSAGE_BASE + 0x10,
105 PCI_RESOURCES_RELEASED = PCI_MESSAGE_BASE + 0x11,
106 PCI_INVALIDATE_BLOCK = PCI_MESSAGE_BASE + 0x12,
107 PCI_QUERY_PROTOCOL_VERSION = PCI_MESSAGE_BASE + 0x13,
108 PCI_CREATE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x14,
109 PCI_DELETE_INTERRUPT_MESSAGE = PCI_MESSAGE_BASE + 0x15,
110 PCI_MESSAGE_MAXIMUM
111};
112
113
114
115
116
117union pci_version {
118 struct {
119 u16 minor_version;
120 u16 major_version;
121 } parts;
122 u32 version;
123} __packed;
124
125
126
127
128
129
130
131union win_slot_encoding {
132 struct {
133 u32 func:8;
134 u32 reserved:24;
135 } bits;
136 u32 slot;
137} __packed;
138
139
140
141
142struct pci_function_description {
143 u16 v_id;
144 u16 d_id;
145 u8 rev;
146 u8 prog_intf;
147 u8 subclass;
148 u8 base_class;
149 u32 subsystem_id;
150 union win_slot_encoding win_slot;
151 u32 ser;
152} __packed;
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170struct hv_msi_desc {
171 u8 vector;
172 u8 delivery_mode;
173 u16 vector_count;
174 u32 reserved;
175 u64 cpu_mask;
176} __packed;
177
178
179
180
181
182
183
184
185
186
187
188
189
190struct tran_int_desc {
191 u16 reserved;
192 u16 vector_count;
193 u32 data;
194 u64 address;
195} __packed;
196
197
198
199
200
201
202struct pci_message {
203 u32 type;
204} __packed;
205
206struct pci_child_message {
207 struct pci_message message_type;
208 union win_slot_encoding wslot;
209} __packed;
210
211struct pci_incoming_message {
212 struct vmpacket_descriptor hdr;
213 struct pci_message message_type;
214} __packed;
215
216struct pci_response {
217 struct vmpacket_descriptor hdr;
218 s32 status;
219} __packed;
220
221struct pci_packet {
222 void (*completion_func)(void *context, struct pci_response *resp,
223 int resp_packet_size);
224 void *compl_ctxt;
225
226 struct pci_message message[0];
227};
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243struct pci_version_request {
244 struct pci_message message_type;
245 enum pci_message_type protocol_version;
246} __packed;
247
248
249
250
251
252
253struct pci_bus_d0_entry {
254 struct pci_message message_type;
255 u32 reserved;
256 u64 mmio_base;
257} __packed;
258
259struct pci_bus_relations {
260 struct pci_incoming_message incoming;
261 u32 device_count;
262 struct pci_function_description func[0];
263} __packed;
264
265struct pci_q_res_req_response {
266 struct vmpacket_descriptor hdr;
267 s32 status;
268 u32 probed_bar[6];
269} __packed;
270
271struct pci_set_power {
272 struct pci_message message_type;
273 union win_slot_encoding wslot;
274 u32 power_state;
275 u32 reserved;
276} __packed;
277
278struct pci_set_power_response {
279 struct vmpacket_descriptor hdr;
280 s32 status;
281 union win_slot_encoding wslot;
282 u32 resultant_state;
283 u32 reserved;
284} __packed;
285
286struct pci_resources_assigned {
287 struct pci_message message_type;
288 union win_slot_encoding wslot;
289 u8 memory_range[0x14][6];
290 u32 msi_descriptors;
291 u32 reserved[4];
292} __packed;
293
294struct pci_create_interrupt {
295 struct pci_message message_type;
296 union win_slot_encoding wslot;
297 struct hv_msi_desc int_desc;
298} __packed;
299
300struct pci_create_int_response {
301 struct pci_response response;
302 u32 reserved;
303 struct tran_int_desc int_desc;
304} __packed;
305
306struct pci_delete_interrupt {
307 struct pci_message message_type;
308 union win_slot_encoding wslot;
309 struct tran_int_desc int_desc;
310} __packed;
311
312struct pci_dev_incoming {
313 struct pci_incoming_message incoming;
314 union win_slot_encoding wslot;
315} __packed;
316
317struct pci_eject_response {
318 struct pci_message message_type;
319 union win_slot_encoding wslot;
320 u32 status;
321} __packed;
322
323static int pci_ring_size = (4 * PAGE_SIZE);
324
325
326
327
328#define HV_PARTITION_ID_SELF ((u64)-1)
329#define HVCALL_RETARGET_INTERRUPT 0x7e
330
331struct retarget_msi_interrupt {
332 u64 partition_id;
333 u64 device_id;
334 u32 source;
335 u32 reserved1;
336 u32 address;
337 u32 data;
338 u64 reserved2;
339 u32 vector;
340 u32 flags;
341 u64 vp_mask;
342} __packed;
343
344
345
346
347
348enum hv_pcibus_state {
349 hv_pcibus_init = 0,
350 hv_pcibus_probed,
351 hv_pcibus_installed,
352 hv_pcibus_maximum
353};
354
355struct hv_pcibus_device {
356 struct pci_sysdata sysdata;
357 enum hv_pcibus_state state;
358 atomic_t remove_lock;
359 struct hv_device *hdev;
360 resource_size_t low_mmio_space;
361 resource_size_t high_mmio_space;
362 struct resource *mem_config;
363 struct resource *low_mmio_res;
364 struct resource *high_mmio_res;
365 struct completion *survey_event;
366 struct completion remove_event;
367 struct pci_bus *pci_bus;
368 spinlock_t config_lock;
369 spinlock_t device_list_lock;
370 void __iomem *cfg_addr;
371
372 struct semaphore enum_sem;
373 struct list_head resources_for_children;
374
375 struct list_head children;
376 struct list_head dr_list;
377
378 struct msi_domain_info msi_info;
379 struct msi_controller msi_chip;
380 struct irq_domain *irq_domain;
381 struct retarget_msi_interrupt retarget_msi_interrupt_params;
382 spinlock_t retarget_msi_interrupt_lock;
383};
384
385
386
387
388
389
390struct hv_dr_work {
391 struct work_struct wrk;
392 struct hv_pcibus_device *bus;
393};
394
395struct hv_dr_state {
396 struct list_head list_entry;
397 u32 device_count;
398 struct pci_function_description func[0];
399};
400
401enum hv_pcichild_state {
402 hv_pcichild_init = 0,
403 hv_pcichild_requirements,
404 hv_pcichild_resourced,
405 hv_pcichild_ejecting,
406 hv_pcichild_maximum
407};
408
409enum hv_pcidev_ref_reason {
410 hv_pcidev_ref_invalid = 0,
411 hv_pcidev_ref_initial,
412 hv_pcidev_ref_by_slot,
413 hv_pcidev_ref_packet,
414 hv_pcidev_ref_pnp,
415 hv_pcidev_ref_childlist,
416 hv_pcidev_irqdata,
417 hv_pcidev_ref_max
418};
419
420struct hv_pci_dev {
421
422 struct list_head list_entry;
423 atomic_t refs;
424 enum hv_pcichild_state state;
425 struct pci_function_description desc;
426 bool reported_missing;
427 struct hv_pcibus_device *hbus;
428 struct work_struct wrk;
429
430
431
432
433
434 u32 probed_bar[6];
435};
436
437struct hv_pci_compl {
438 struct completion host_event;
439 s32 completion_status;
440};
441
442
443
444
445
446
447
448
449
450
451
452static void hv_pci_generic_compl(void *context, struct pci_response *resp,
453 int resp_packet_size)
454{
455 struct hv_pci_compl *comp_pkt = context;
456
457 if (resp_packet_size >= offsetofend(struct pci_response, status))
458 comp_pkt->completion_status = resp->status;
459 else
460 comp_pkt->completion_status = -1;
461
462 complete(&comp_pkt->host_event);
463}
464
465static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
466 u32 wslot);
467static void get_pcichild(struct hv_pci_dev *hv_pcidev,
468 enum hv_pcidev_ref_reason reason);
469static void put_pcichild(struct hv_pci_dev *hv_pcidev,
470 enum hv_pcidev_ref_reason reason);
471
472static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
473static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
474
475
476
477
478
479
480
481
482
483static u32 devfn_to_wslot(int devfn)
484{
485 union win_slot_encoding wslot;
486
487 wslot.slot = 0;
488 wslot.bits.func = PCI_SLOT(devfn) | (PCI_FUNC(devfn) << 5);
489
490 return wslot.slot;
491}
492
493
494
495
496
497
498
499
500
501static int wslot_to_devfn(u32 wslot)
502{
503 union win_slot_encoding slot_no;
504
505 slot_no.slot = wslot;
506 return PCI_DEVFN(0, slot_no.bits.func);
507}
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
525 int size, u32 *val)
526{
527 unsigned long flags;
528 void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where;
529
530
531
532
533 if (where + size <= PCI_COMMAND) {
534 memcpy(val, ((u8 *)&hpdev->desc.v_id) + where, size);
535 } else if (where >= PCI_CLASS_REVISION && where + size <=
536 PCI_CACHE_LINE_SIZE) {
537 memcpy(val, ((u8 *)&hpdev->desc.rev) + where -
538 PCI_CLASS_REVISION, size);
539 } else if (where >= PCI_SUBSYSTEM_VENDOR_ID && where + size <=
540 PCI_ROM_ADDRESS) {
541 memcpy(val, (u8 *)&hpdev->desc.subsystem_id + where -
542 PCI_SUBSYSTEM_VENDOR_ID, size);
543 } else if (where >= PCI_ROM_ADDRESS && where + size <=
544 PCI_CAPABILITY_LIST) {
545
546 *val = 0;
547 } else if (where >= PCI_INTERRUPT_LINE && where + size <=
548 PCI_INTERRUPT_PIN) {
549
550
551
552
553
554 *val = 0;
555 } else if (where + size <= CFG_PAGE_SIZE) {
556 spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
557
558 writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
559
560 mb();
561
562 switch (size) {
563 case 1:
564 *val = readb(addr);
565 break;
566 case 2:
567 *val = readw(addr);
568 break;
569 default:
570 *val = readl(addr);
571 break;
572 }
573
574
575
576
577 mb();
578 spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
579 } else {
580 dev_err(&hpdev->hbus->hdev->device,
581 "Attempt to read beyond a function's config space.\n");
582 }
583}
584
585
586
587
588
589
590
591
592static void _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where,
593 int size, u32 val)
594{
595 unsigned long flags;
596 void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where;
597
598 if (where >= PCI_SUBSYSTEM_VENDOR_ID &&
599 where + size <= PCI_CAPABILITY_LIST) {
600
601 } else if (where >= PCI_COMMAND && where + size <= CFG_PAGE_SIZE) {
602 spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
603
604 writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
605
606 wmb();
607
608 switch (size) {
609 case 1:
610 writeb(val, addr);
611 break;
612 case 2:
613 writew(val, addr);
614 break;
615 default:
616 writel(val, addr);
617 break;
618 }
619
620
621
622
623 mb();
624 spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
625 } else {
626 dev_err(&hpdev->hbus->hdev->device,
627 "Attempt to write beyond a function's config space.\n");
628 }
629}
630
631
632
633
634
635
636
637
638
639
640
641
642static int hv_pcifront_read_config(struct pci_bus *bus, unsigned int devfn,
643 int where, int size, u32 *val)
644{
645 struct hv_pcibus_device *hbus =
646 container_of(bus->sysdata, struct hv_pcibus_device, sysdata);
647 struct hv_pci_dev *hpdev;
648
649 hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(devfn));
650 if (!hpdev)
651 return PCIBIOS_DEVICE_NOT_FOUND;
652
653 _hv_pcifront_read_config(hpdev, where, size, val);
654
655 put_pcichild(hpdev, hv_pcidev_ref_by_slot);
656 return PCIBIOS_SUCCESSFUL;
657}
658
659
660
661
662
663
664
665
666
667
668
669
670static int hv_pcifront_write_config(struct pci_bus *bus, unsigned int devfn,
671 int where, int size, u32 val)
672{
673 struct hv_pcibus_device *hbus =
674 container_of(bus->sysdata, struct hv_pcibus_device, sysdata);
675 struct hv_pci_dev *hpdev;
676
677 hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(devfn));
678 if (!hpdev)
679 return PCIBIOS_DEVICE_NOT_FOUND;
680
681 _hv_pcifront_write_config(hpdev, where, size, val);
682
683 put_pcichild(hpdev, hv_pcidev_ref_by_slot);
684 return PCIBIOS_SUCCESSFUL;
685}
686
687
688static struct pci_ops hv_pcifront_ops = {
689 .read = hv_pcifront_read_config,
690 .write = hv_pcifront_write_config,
691};
692
693
694static void hv_int_desc_free(struct hv_pci_dev *hpdev,
695 struct tran_int_desc *int_desc)
696{
697 struct pci_delete_interrupt *int_pkt;
698 struct {
699 struct pci_packet pkt;
700 u8 buffer[sizeof(struct pci_delete_interrupt)];
701 } ctxt;
702
703 memset(&ctxt, 0, sizeof(ctxt));
704 int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
705 int_pkt->message_type.type =
706 PCI_DELETE_INTERRUPT_MESSAGE;
707 int_pkt->wslot.slot = hpdev->desc.win_slot.slot;
708 int_pkt->int_desc = *int_desc;
709 vmbus_sendpacket(hpdev->hbus->hdev->channel, int_pkt, sizeof(*int_pkt),
710 (unsigned long)&ctxt.pkt, VM_PKT_DATA_INBAND, 0);
711 kfree(int_desc);
712}
713
714
715
716
717
718
719
720
721
722
723
724
725static void hv_msi_free(struct irq_domain *domain, struct msi_domain_info *info,
726 unsigned int irq)
727{
728 struct hv_pcibus_device *hbus;
729 struct hv_pci_dev *hpdev;
730 struct pci_dev *pdev;
731 struct tran_int_desc *int_desc;
732 struct irq_data *irq_data = irq_domain_get_irq_data(domain, irq);
733 struct msi_desc *msi = irq_data_get_msi_desc(irq_data);
734
735 pdev = msi_desc_to_pci_dev(msi);
736 hbus = info->data;
737 int_desc = irq_data_get_irq_chip_data(irq_data);
738 if (!int_desc)
739 return;
740
741 irq_data->chip_data = NULL;
742 hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
743 if (!hpdev) {
744 kfree(int_desc);
745 return;
746 }
747
748 hv_int_desc_free(hpdev, int_desc);
749 put_pcichild(hpdev, hv_pcidev_ref_by_slot);
750}
751
752static int hv_set_affinity(struct irq_data *data, const struct cpumask *dest,
753 bool force)
754{
755 struct irq_data *parent = data->parent_data;
756
757 return parent->chip->irq_set_affinity(parent, dest, force);
758}
759
760static void hv_irq_mask(struct irq_data *data)
761{
762 pci_msi_mask_irq(data);
763}
764
765
766
767
768
769
770
771
772
773
774
775static void hv_irq_unmask(struct irq_data *data)
776{
777 struct msi_desc *msi_desc = irq_data_get_msi_desc(data);
778 struct irq_cfg *cfg = irqd_cfg(data);
779 struct retarget_msi_interrupt *params;
780 struct hv_pcibus_device *hbus;
781 struct cpumask *dest;
782 struct pci_bus *pbus;
783 struct pci_dev *pdev;
784 int cpu;
785 unsigned long flags;
786
787 dest = irq_data_get_affinity_mask(data);
788 pdev = msi_desc_to_pci_dev(msi_desc);
789 pbus = pdev->bus;
790 hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
791
792 spin_lock_irqsave(&hbus->retarget_msi_interrupt_lock, flags);
793
794 params = &hbus->retarget_msi_interrupt_params;
795 memset(params, 0, sizeof(*params));
796 params->partition_id = HV_PARTITION_ID_SELF;
797 params->source = 1;
798 params->address = msi_desc->msg.address_lo;
799 params->data = msi_desc->msg.data;
800 params->device_id = (hbus->hdev->dev_instance.b[5] << 24) |
801 (hbus->hdev->dev_instance.b[4] << 16) |
802 (hbus->hdev->dev_instance.b[7] << 8) |
803 (hbus->hdev->dev_instance.b[6] & 0xf8) |
804 PCI_FUNC(pdev->devfn);
805 params->vector = cfg->vector;
806
807 for_each_cpu_and(cpu, dest, cpu_online_mask)
808 params->vp_mask |= (1ULL << vmbus_cpu_number_to_vp_number(cpu));
809
810 hv_do_hypercall(HVCALL_RETARGET_INTERRUPT, params, NULL);
811
812 spin_unlock_irqrestore(&hbus->retarget_msi_interrupt_lock, flags);
813
814 pci_msi_unmask_irq(data);
815}
816
817struct compose_comp_ctxt {
818 struct hv_pci_compl comp_pkt;
819 struct tran_int_desc int_desc;
820};
821
822static void hv_pci_compose_compl(void *context, struct pci_response *resp,
823 int resp_packet_size)
824{
825 struct compose_comp_ctxt *comp_pkt = context;
826 struct pci_create_int_response *int_resp =
827 (struct pci_create_int_response *)resp;
828
829 comp_pkt->comp_pkt.completion_status = resp->status;
830 comp_pkt->int_desc = int_resp->int_desc;
831 complete(&comp_pkt->comp_pkt.host_event);
832}
833
834
835
836
837
838
839
840
841
842
843
844
845static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
846{
847 struct irq_cfg *cfg = irqd_cfg(data);
848 struct hv_pcibus_device *hbus;
849 struct hv_pci_dev *hpdev;
850 struct pci_bus *pbus;
851 struct pci_dev *pdev;
852 struct pci_create_interrupt *int_pkt;
853 struct compose_comp_ctxt comp;
854 struct tran_int_desc *int_desc;
855 struct cpumask *affinity;
856 struct {
857 struct pci_packet pkt;
858 u8 buffer[sizeof(struct pci_create_interrupt)];
859 } ctxt;
860 int cpu;
861 int ret;
862
863 pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
864 pbus = pdev->bus;
865 hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
866 hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
867 if (!hpdev)
868 goto return_null_message;
869
870
871 if (data->chip_data) {
872 int_desc = data->chip_data;
873 data->chip_data = NULL;
874 hv_int_desc_free(hpdev, int_desc);
875 }
876
877 int_desc = kzalloc(sizeof(*int_desc), GFP_KERNEL);
878 if (!int_desc)
879 goto drop_reference;
880
881 memset(&ctxt, 0, sizeof(ctxt));
882 init_completion(&comp.comp_pkt.host_event);
883 ctxt.pkt.completion_func = hv_pci_compose_compl;
884 ctxt.pkt.compl_ctxt = ∁
885 int_pkt = (struct pci_create_interrupt *)&ctxt.pkt.message;
886 int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE;
887 int_pkt->wslot.slot = hpdev->desc.win_slot.slot;
888 int_pkt->int_desc.vector = cfg->vector;
889 int_pkt->int_desc.vector_count = 1;
890 int_pkt->int_desc.delivery_mode =
891 (apic->irq_delivery_mode == dest_LowestPrio) ? 1 : 0;
892
893
894
895
896
897 affinity = irq_data_get_affinity_mask(data);
898 for_each_cpu_and(cpu, affinity, cpu_online_mask) {
899 int_pkt->int_desc.cpu_mask |=
900 (1ULL << vmbus_cpu_number_to_vp_number(cpu));
901 }
902
903 ret = vmbus_sendpacket(hpdev->hbus->hdev->channel, int_pkt,
904 sizeof(*int_pkt), (unsigned long)&ctxt.pkt,
905 VM_PKT_DATA_INBAND,
906 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
907 if (ret)
908 goto free_int_desc;
909
910 wait_for_completion(&comp.comp_pkt.host_event);
911
912 if (comp.comp_pkt.completion_status < 0) {
913 dev_err(&hbus->hdev->device,
914 "Request for interrupt failed: 0x%x",
915 comp.comp_pkt.completion_status);
916 goto free_int_desc;
917 }
918
919
920
921
922
923
924 *int_desc = comp.int_desc;
925 data->chip_data = int_desc;
926
927
928 msg->address_hi = comp.int_desc.address >> 32;
929 msg->address_lo = comp.int_desc.address & 0xffffffff;
930 msg->data = comp.int_desc.data;
931
932 put_pcichild(hpdev, hv_pcidev_ref_by_slot);
933 return;
934
935free_int_desc:
936 kfree(int_desc);
937drop_reference:
938 put_pcichild(hpdev, hv_pcidev_ref_by_slot);
939return_null_message:
940 msg->address_hi = 0;
941 msg->address_lo = 0;
942 msg->data = 0;
943}
944
945
946static struct irq_chip hv_msi_irq_chip = {
947 .name = "Hyper-V PCIe MSI",
948 .irq_compose_msi_msg = hv_compose_msi_msg,
949 .irq_set_affinity = hv_set_affinity,
950 .irq_ack = irq_chip_ack_parent,
951 .irq_mask = hv_irq_mask,
952 .irq_unmask = hv_irq_unmask,
953};
954
955static irq_hw_number_t hv_msi_domain_ops_get_hwirq(struct msi_domain_info *info,
956 msi_alloc_info_t *arg)
957{
958 return arg->msi_hwirq;
959}
960
961static struct msi_domain_ops hv_msi_ops = {
962 .get_hwirq = hv_msi_domain_ops_get_hwirq,
963 .msi_prepare = pci_msi_prepare,
964 .set_desc = pci_msi_set_desc,
965 .msi_free = hv_msi_free,
966};
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus)
982{
983 hbus->msi_info.chip = &hv_msi_irq_chip;
984 hbus->msi_info.ops = &hv_msi_ops;
985 hbus->msi_info.flags = (MSI_FLAG_USE_DEF_DOM_OPS |
986 MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_MULTI_PCI_MSI |
987 MSI_FLAG_PCI_MSIX);
988 hbus->msi_info.handler = handle_edge_irq;
989 hbus->msi_info.handler_name = "edge";
990 hbus->msi_info.data = hbus;
991 hbus->irq_domain = pci_msi_create_irq_domain(hbus->sysdata.fwnode,
992 &hbus->msi_info,
993 x86_vector_domain);
994 if (!hbus->irq_domain) {
995 dev_err(&hbus->hdev->device,
996 "Failed to build an MSI IRQ domain\n");
997 return -ENODEV;
998 }
999
1000 return 0;
1001}
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017static u64 get_bar_size(u64 bar_val)
1018{
1019 return round_up((1 + ~(bar_val & PCI_BASE_ADDRESS_MEM_MASK)),
1020 PAGE_SIZE);
1021}
1022
1023
1024
1025
1026
1027static void survey_child_resources(struct hv_pcibus_device *hbus)
1028{
1029 struct list_head *iter;
1030 struct hv_pci_dev *hpdev;
1031 resource_size_t bar_size = 0;
1032 unsigned long flags;
1033 struct completion *event;
1034 u64 bar_val;
1035 int i;
1036
1037
1038 event = xchg(&hbus->survey_event, NULL);
1039 if (!event)
1040 return;
1041
1042
1043 if (hbus->low_mmio_space || hbus->high_mmio_space) {
1044 complete(event);
1045 return;
1046 }
1047
1048 spin_lock_irqsave(&hbus->device_list_lock, flags);
1049
1050
1051
1052
1053
1054
1055 list_for_each(iter, &hbus->children) {
1056 hpdev = container_of(iter, struct hv_pci_dev, list_entry);
1057 for (i = 0; i < 6; i++) {
1058 if (hpdev->probed_bar[i] & PCI_BASE_ADDRESS_SPACE_IO)
1059 dev_err(&hbus->hdev->device,
1060 "There's an I/O BAR in this list!\n");
1061
1062 if (hpdev->probed_bar[i] != 0) {
1063
1064
1065
1066
1067
1068 bar_val = hpdev->probed_bar[i];
1069 if (bar_val & PCI_BASE_ADDRESS_MEM_TYPE_64)
1070 bar_val |=
1071 ((u64)hpdev->probed_bar[++i] << 32);
1072 else
1073 bar_val |= 0xffffffff00000000ULL;
1074
1075 bar_size = get_bar_size(bar_val);
1076
1077 if (bar_val & PCI_BASE_ADDRESS_MEM_TYPE_64)
1078 hbus->high_mmio_space += bar_size;
1079 else
1080 hbus->low_mmio_space += bar_size;
1081 }
1082 }
1083 }
1084
1085 spin_unlock_irqrestore(&hbus->device_list_lock, flags);
1086 complete(event);
1087}
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101static void prepopulate_bars(struct hv_pcibus_device *hbus)
1102{
1103 resource_size_t high_size = 0;
1104 resource_size_t low_size = 0;
1105 resource_size_t high_base = 0;
1106 resource_size_t low_base = 0;
1107 resource_size_t bar_size;
1108 struct hv_pci_dev *hpdev;
1109 struct list_head *iter;
1110 unsigned long flags;
1111 u64 bar_val;
1112 u32 command;
1113 bool high;
1114 int i;
1115
1116 if (hbus->low_mmio_space) {
1117 low_size = 1ULL << (63 - __builtin_clzll(hbus->low_mmio_space));
1118 low_base = hbus->low_mmio_res->start;
1119 }
1120
1121 if (hbus->high_mmio_space) {
1122 high_size = 1ULL <<
1123 (63 - __builtin_clzll(hbus->high_mmio_space));
1124 high_base = hbus->high_mmio_res->start;
1125 }
1126
1127 spin_lock_irqsave(&hbus->device_list_lock, flags);
1128
1129
1130 do {
1131 list_for_each(iter, &hbus->children) {
1132 hpdev = container_of(iter, struct hv_pci_dev,
1133 list_entry);
1134 for (i = 0; i < 6; i++) {
1135 bar_val = hpdev->probed_bar[i];
1136 if (bar_val == 0)
1137 continue;
1138 high = bar_val & PCI_BASE_ADDRESS_MEM_TYPE_64;
1139 if (high) {
1140 bar_val |=
1141 ((u64)hpdev->probed_bar[i + 1]
1142 << 32);
1143 } else {
1144 bar_val |= 0xffffffffULL << 32;
1145 }
1146 bar_size = get_bar_size(bar_val);
1147 if (high) {
1148 if (high_size != bar_size) {
1149 i++;
1150 continue;
1151 }
1152 _hv_pcifront_write_config(hpdev,
1153 PCI_BASE_ADDRESS_0 + (4 * i),
1154 4,
1155 (u32)(high_base & 0xffffff00));
1156 i++;
1157 _hv_pcifront_write_config(hpdev,
1158 PCI_BASE_ADDRESS_0 + (4 * i),
1159 4, (u32)(high_base >> 32));
1160 high_base += bar_size;
1161 } else {
1162 if (low_size != bar_size)
1163 continue;
1164 _hv_pcifront_write_config(hpdev,
1165 PCI_BASE_ADDRESS_0 + (4 * i),
1166 4,
1167 (u32)(low_base & 0xffffff00));
1168 low_base += bar_size;
1169 }
1170 }
1171 if (high_size <= 1 && low_size <= 1) {
1172
1173 _hv_pcifront_read_config(hpdev, PCI_COMMAND, 2,
1174 &command);
1175 command |= PCI_COMMAND_MEMORY;
1176 _hv_pcifront_write_config(hpdev, PCI_COMMAND, 2,
1177 command);
1178 break;
1179 }
1180 }
1181
1182 high_size >>= 1;
1183 low_size >>= 1;
1184 } while (high_size || low_size);
1185
1186 spin_unlock_irqrestore(&hbus->device_list_lock, flags);
1187}
1188
1189
1190
1191
1192
1193
1194
1195static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus)
1196{
1197
1198 hbus->pci_bus = pci_create_root_bus(&hbus->hdev->device,
1199 0,
1200 &hv_pcifront_ops,
1201 &hbus->sysdata,
1202 &hbus->resources_for_children);
1203 if (!hbus->pci_bus)
1204 return -ENODEV;
1205
1206 hbus->pci_bus->msi = &hbus->msi_chip;
1207 hbus->pci_bus->msi->dev = &hbus->hdev->device;
1208
1209 pci_scan_child_bus(hbus->pci_bus);
1210 pci_bus_assign_resources(hbus->pci_bus);
1211 pci_bus_add_devices(hbus->pci_bus);
1212 hbus->state = hv_pcibus_installed;
1213 return 0;
1214}
1215
1216struct q_res_req_compl {
1217 struct completion host_event;
1218 struct hv_pci_dev *hpdev;
1219};
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230static void q_resource_requirements(void *context, struct pci_response *resp,
1231 int resp_packet_size)
1232{
1233 struct q_res_req_compl *completion = context;
1234 struct pci_q_res_req_response *q_res_req =
1235 (struct pci_q_res_req_response *)resp;
1236 int i;
1237
1238 if (resp->status < 0) {
1239 dev_err(&completion->hpdev->hbus->hdev->device,
1240 "query resource requirements failed: %x\n",
1241 resp->status);
1242 } else {
1243 for (i = 0; i < 6; i++) {
1244 completion->hpdev->probed_bar[i] =
1245 q_res_req->probed_bar[i];
1246 }
1247 }
1248
1249 complete(&completion->host_event);
1250}
1251
1252static void get_pcichild(struct hv_pci_dev *hpdev,
1253 enum hv_pcidev_ref_reason reason)
1254{
1255 atomic_inc(&hpdev->refs);
1256}
1257
1258static void put_pcichild(struct hv_pci_dev *hpdev,
1259 enum hv_pcidev_ref_reason reason)
1260{
1261 if (atomic_dec_and_test(&hpdev->refs))
1262 kfree(hpdev);
1263}
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
1277 struct pci_function_description *desc)
1278{
1279 struct hv_pci_dev *hpdev;
1280 struct pci_child_message *res_req;
1281 struct q_res_req_compl comp_pkt;
1282 struct {
1283 struct pci_packet init_packet;
1284 u8 buffer[sizeof(struct pci_child_message)];
1285 } pkt;
1286 unsigned long flags;
1287 int ret;
1288
1289 hpdev = kzalloc(sizeof(*hpdev), GFP_ATOMIC);
1290 if (!hpdev)
1291 return NULL;
1292
1293 hpdev->hbus = hbus;
1294
1295 memset(&pkt, 0, sizeof(pkt));
1296 init_completion(&comp_pkt.host_event);
1297 comp_pkt.hpdev = hpdev;
1298 pkt.init_packet.compl_ctxt = &comp_pkt;
1299 pkt.init_packet.completion_func = q_resource_requirements;
1300 res_req = (struct pci_child_message *)&pkt.init_packet.message;
1301 res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS;
1302 res_req->wslot.slot = desc->win_slot.slot;
1303
1304 ret = vmbus_sendpacket(hbus->hdev->channel, res_req,
1305 sizeof(struct pci_child_message),
1306 (unsigned long)&pkt.init_packet,
1307 VM_PKT_DATA_INBAND,
1308 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
1309 if (ret)
1310 goto error;
1311
1312 wait_for_completion(&comp_pkt.host_event);
1313
1314 hpdev->desc = *desc;
1315 get_pcichild(hpdev, hv_pcidev_ref_initial);
1316 get_pcichild(hpdev, hv_pcidev_ref_childlist);
1317 spin_lock_irqsave(&hbus->device_list_lock, flags);
1318 list_add_tail(&hpdev->list_entry, &hbus->children);
1319 spin_unlock_irqrestore(&hbus->device_list_lock, flags);
1320 return hpdev;
1321
1322error:
1323 kfree(hpdev);
1324 return NULL;
1325}
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
1341 u32 wslot)
1342{
1343 unsigned long flags;
1344 struct hv_pci_dev *iter, *hpdev = NULL;
1345
1346 spin_lock_irqsave(&hbus->device_list_lock, flags);
1347 list_for_each_entry(iter, &hbus->children, list_entry) {
1348 if (iter->desc.win_slot.slot == wslot) {
1349 hpdev = iter;
1350 get_pcichild(hpdev, hv_pcidev_ref_by_slot);
1351 break;
1352 }
1353 }
1354 spin_unlock_irqrestore(&hbus->device_list_lock, flags);
1355
1356 return hpdev;
1357}
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386static void pci_devices_present_work(struct work_struct *work)
1387{
1388 u32 child_no;
1389 bool found;
1390 struct list_head *iter;
1391 struct pci_function_description *new_desc;
1392 struct hv_pci_dev *hpdev;
1393 struct hv_pcibus_device *hbus;
1394 struct list_head removed;
1395 struct hv_dr_work *dr_wrk;
1396 struct hv_dr_state *dr = NULL;
1397 unsigned long flags;
1398
1399 dr_wrk = container_of(work, struct hv_dr_work, wrk);
1400 hbus = dr_wrk->bus;
1401 kfree(dr_wrk);
1402
1403 INIT_LIST_HEAD(&removed);
1404
1405 if (down_interruptible(&hbus->enum_sem)) {
1406 put_hvpcibus(hbus);
1407 return;
1408 }
1409
1410
1411 spin_lock_irqsave(&hbus->device_list_lock, flags);
1412 while (!list_empty(&hbus->dr_list)) {
1413 dr = list_first_entry(&hbus->dr_list, struct hv_dr_state,
1414 list_entry);
1415 list_del(&dr->list_entry);
1416
1417
1418 if (!list_empty(&hbus->dr_list)) {
1419 kfree(dr);
1420 continue;
1421 }
1422 }
1423 spin_unlock_irqrestore(&hbus->device_list_lock, flags);
1424
1425 if (!dr) {
1426 up(&hbus->enum_sem);
1427 put_hvpcibus(hbus);
1428 return;
1429 }
1430
1431
1432 spin_lock_irqsave(&hbus->device_list_lock, flags);
1433 list_for_each(iter, &hbus->children) {
1434 hpdev = container_of(iter, struct hv_pci_dev,
1435 list_entry);
1436 hpdev->reported_missing = true;
1437 }
1438 spin_unlock_irqrestore(&hbus->device_list_lock, flags);
1439
1440
1441 for (child_no = 0; child_no < dr->device_count; child_no++) {
1442 found = false;
1443 new_desc = &dr->func[child_no];
1444
1445 spin_lock_irqsave(&hbus->device_list_lock, flags);
1446 list_for_each(iter, &hbus->children) {
1447 hpdev = container_of(iter, struct hv_pci_dev,
1448 list_entry);
1449 if ((hpdev->desc.win_slot.slot ==
1450 new_desc->win_slot.slot) &&
1451 (hpdev->desc.v_id == new_desc->v_id) &&
1452 (hpdev->desc.d_id == new_desc->d_id) &&
1453 (hpdev->desc.ser == new_desc->ser)) {
1454 hpdev->reported_missing = false;
1455 found = true;
1456 }
1457 }
1458 spin_unlock_irqrestore(&hbus->device_list_lock, flags);
1459
1460 if (!found) {
1461 hpdev = new_pcichild_device(hbus, new_desc);
1462 if (!hpdev)
1463 dev_err(&hbus->hdev->device,
1464 "couldn't record a child device.\n");
1465 }
1466 }
1467
1468
1469 spin_lock_irqsave(&hbus->device_list_lock, flags);
1470 do {
1471 found = false;
1472 list_for_each(iter, &hbus->children) {
1473 hpdev = container_of(iter, struct hv_pci_dev,
1474 list_entry);
1475 if (hpdev->reported_missing) {
1476 found = true;
1477 put_pcichild(hpdev, hv_pcidev_ref_childlist);
1478 list_move_tail(&hpdev->list_entry, &removed);
1479 break;
1480 }
1481 }
1482 } while (found);
1483 spin_unlock_irqrestore(&hbus->device_list_lock, flags);
1484
1485
1486 while (!list_empty(&removed)) {
1487 hpdev = list_first_entry(&removed, struct hv_pci_dev,
1488 list_entry);
1489 list_del(&hpdev->list_entry);
1490 put_pcichild(hpdev, hv_pcidev_ref_initial);
1491 }
1492
1493
1494 if (hbus->state == hv_pcibus_installed) {
1495 pci_lock_rescan_remove();
1496 pci_scan_child_bus(hbus->pci_bus);
1497 pci_unlock_rescan_remove();
1498 } else {
1499 survey_child_resources(hbus);
1500 }
1501
1502 up(&hbus->enum_sem);
1503 put_hvpcibus(hbus);
1504 kfree(dr);
1505}
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
1516 struct pci_bus_relations *relations)
1517{
1518 struct hv_dr_state *dr;
1519 struct hv_dr_work *dr_wrk;
1520 unsigned long flags;
1521
1522 dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
1523 if (!dr_wrk)
1524 return;
1525
1526 dr = kzalloc(offsetof(struct hv_dr_state, func) +
1527 (sizeof(struct pci_function_description) *
1528 (relations->device_count)), GFP_NOWAIT);
1529 if (!dr) {
1530 kfree(dr_wrk);
1531 return;
1532 }
1533
1534 INIT_WORK(&dr_wrk->wrk, pci_devices_present_work);
1535 dr_wrk->bus = hbus;
1536 dr->device_count = relations->device_count;
1537 if (dr->device_count != 0) {
1538 memcpy(dr->func, relations->func,
1539 sizeof(struct pci_function_description) *
1540 dr->device_count);
1541 }
1542
1543 spin_lock_irqsave(&hbus->device_list_lock, flags);
1544 list_add_tail(&dr->list_entry, &hbus->dr_list);
1545 spin_unlock_irqrestore(&hbus->device_list_lock, flags);
1546
1547 get_hvpcibus(hbus);
1548 schedule_work(&dr_wrk->wrk);
1549}
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560static void hv_eject_device_work(struct work_struct *work)
1561{
1562 struct pci_eject_response *ejct_pkt;
1563 struct hv_pci_dev *hpdev;
1564 struct pci_dev *pdev;
1565 unsigned long flags;
1566 int wslot;
1567 struct {
1568 struct pci_packet pkt;
1569 u8 buffer[sizeof(struct pci_eject_response)];
1570 } ctxt;
1571
1572 hpdev = container_of(work, struct hv_pci_dev, wrk);
1573
1574 if (hpdev->state != hv_pcichild_ejecting) {
1575 put_pcichild(hpdev, hv_pcidev_ref_pnp);
1576 return;
1577 }
1578
1579
1580
1581
1582
1583
1584
1585 wslot = wslot_to_devfn(hpdev->desc.win_slot.slot);
1586 pdev = pci_get_domain_bus_and_slot(hpdev->hbus->sysdata.domain, 0,
1587 wslot);
1588 if (pdev) {
1589 pci_stop_and_remove_bus_device(pdev);
1590 pci_dev_put(pdev);
1591 }
1592
1593 spin_lock_irqsave(&hpdev->hbus->device_list_lock, flags);
1594 list_del(&hpdev->list_entry);
1595 spin_unlock_irqrestore(&hpdev->hbus->device_list_lock, flags);
1596
1597 memset(&ctxt, 0, sizeof(ctxt));
1598 ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
1599 ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE;
1600 ejct_pkt->wslot.slot = hpdev->desc.win_slot.slot;
1601 vmbus_sendpacket(hpdev->hbus->hdev->channel, ejct_pkt,
1602 sizeof(*ejct_pkt), (unsigned long)&ctxt.pkt,
1603 VM_PKT_DATA_INBAND, 0);
1604
1605 put_pcichild(hpdev, hv_pcidev_ref_childlist);
1606 put_pcichild(hpdev, hv_pcidev_ref_pnp);
1607 put_hvpcibus(hpdev->hbus);
1608}
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
1619{
1620 hpdev->state = hv_pcichild_ejecting;
1621 get_pcichild(hpdev, hv_pcidev_ref_pnp);
1622 INIT_WORK(&hpdev->wrk, hv_eject_device_work);
1623 get_hvpcibus(hpdev->hbus);
1624 schedule_work(&hpdev->wrk);
1625}
1626
1627
1628
1629
1630
1631
1632
1633
1634static void hv_pci_onchannelcallback(void *context)
1635{
1636 const int packet_size = 0x100;
1637 int ret;
1638 struct hv_pcibus_device *hbus = context;
1639 u32 bytes_recvd;
1640 u64 req_id;
1641 struct vmpacket_descriptor *desc;
1642 unsigned char *buffer;
1643 int bufferlen = packet_size;
1644 struct pci_packet *comp_packet;
1645 struct pci_response *response;
1646 struct pci_incoming_message *new_message;
1647 struct pci_bus_relations *bus_rel;
1648 struct pci_dev_incoming *dev_message;
1649 struct hv_pci_dev *hpdev;
1650
1651 buffer = kmalloc(bufferlen, GFP_ATOMIC);
1652 if (!buffer)
1653 return;
1654
1655 while (1) {
1656 ret = vmbus_recvpacket_raw(hbus->hdev->channel, buffer,
1657 bufferlen, &bytes_recvd, &req_id);
1658
1659 if (ret == -ENOBUFS) {
1660 kfree(buffer);
1661
1662 bufferlen = bytes_recvd;
1663 buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
1664 if (!buffer)
1665 return;
1666 continue;
1667 }
1668
1669
1670 if (ret || !bytes_recvd)
1671 break;
1672
1673
1674
1675
1676
1677 if (bytes_recvd <= sizeof(struct pci_response))
1678 continue;
1679 desc = (struct vmpacket_descriptor *)buffer;
1680
1681 switch (desc->type) {
1682 case VM_PKT_COMP:
1683
1684
1685
1686
1687
1688 comp_packet = (struct pci_packet *)req_id;
1689 response = (struct pci_response *)buffer;
1690 comp_packet->completion_func(comp_packet->compl_ctxt,
1691 response,
1692 bytes_recvd);
1693 break;
1694
1695 case VM_PKT_DATA_INBAND:
1696
1697 new_message = (struct pci_incoming_message *)buffer;
1698 switch (new_message->message_type.type) {
1699 case PCI_BUS_RELATIONS:
1700
1701 bus_rel = (struct pci_bus_relations *)buffer;
1702 if (bytes_recvd <
1703 offsetof(struct pci_bus_relations, func) +
1704 (sizeof(struct pci_function_description) *
1705 (bus_rel->device_count))) {
1706 dev_err(&hbus->hdev->device,
1707 "bus relations too small\n");
1708 break;
1709 }
1710
1711 hv_pci_devices_present(hbus, bus_rel);
1712 break;
1713
1714 case PCI_EJECT:
1715
1716 dev_message = (struct pci_dev_incoming *)buffer;
1717 hpdev = get_pcichild_wslot(hbus,
1718 dev_message->wslot.slot);
1719 if (hpdev) {
1720 hv_pci_eject_device(hpdev);
1721 put_pcichild(hpdev,
1722 hv_pcidev_ref_by_slot);
1723 }
1724 break;
1725
1726 default:
1727 dev_warn(&hbus->hdev->device,
1728 "Unimplemented protocol message %x\n",
1729 new_message->message_type.type);
1730 break;
1731 }
1732 break;
1733
1734 default:
1735 dev_err(&hbus->hdev->device,
1736 "unhandled packet type %d, tid %llx len %d\n",
1737 desc->type, req_id, bytes_recvd);
1738 break;
1739 }
1740 }
1741
1742 kfree(buffer);
1743}
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761static int hv_pci_protocol_negotiation(struct hv_device *hdev)
1762{
1763 struct pci_version_request *version_req;
1764 struct hv_pci_compl comp_pkt;
1765 struct pci_packet *pkt;
1766 int ret;
1767
1768
1769
1770
1771
1772
1773
1774 pkt = kzalloc(sizeof(*pkt) + sizeof(*version_req), GFP_KERNEL);
1775 if (!pkt)
1776 return -ENOMEM;
1777
1778 init_completion(&comp_pkt.host_event);
1779 pkt->completion_func = hv_pci_generic_compl;
1780 pkt->compl_ctxt = &comp_pkt;
1781 version_req = (struct pci_version_request *)&pkt->message;
1782 version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
1783 version_req->protocol_version = PCI_PROTOCOL_VERSION_CURRENT;
1784
1785 ret = vmbus_sendpacket(hdev->channel, version_req,
1786 sizeof(struct pci_version_request),
1787 (unsigned long)pkt, VM_PKT_DATA_INBAND,
1788 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
1789 if (ret)
1790 goto exit;
1791
1792 wait_for_completion(&comp_pkt.host_event);
1793
1794 if (comp_pkt.completion_status < 0) {
1795 dev_err(&hdev->device,
1796 "PCI Pass-through VSP failed version request %x\n",
1797 comp_pkt.completion_status);
1798 ret = -EPROTO;
1799 goto exit;
1800 }
1801
1802 ret = 0;
1803
1804exit:
1805 kfree(pkt);
1806 return ret;
1807}
1808
1809
1810
1811
1812
1813
1814static void hv_pci_free_bridge_windows(struct hv_pcibus_device *hbus)
1815{
1816
1817
1818
1819
1820
1821 if (hbus->low_mmio_space && hbus->low_mmio_res) {
1822 hbus->low_mmio_res->flags |= IORESOURCE_BUSY;
1823 vmbus_free_mmio(hbus->low_mmio_res->start,
1824 resource_size(hbus->low_mmio_res));
1825 }
1826
1827 if (hbus->high_mmio_space && hbus->high_mmio_res) {
1828 hbus->high_mmio_res->flags |= IORESOURCE_BUSY;
1829 vmbus_free_mmio(hbus->high_mmio_res->start,
1830 resource_size(hbus->high_mmio_res));
1831 }
1832}
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859static int hv_pci_allocate_bridge_windows(struct hv_pcibus_device *hbus)
1860{
1861 resource_size_t align;
1862 int ret;
1863
1864 if (hbus->low_mmio_space) {
1865 align = 1ULL << (63 - __builtin_clzll(hbus->low_mmio_space));
1866 ret = vmbus_allocate_mmio(&hbus->low_mmio_res, hbus->hdev, 0,
1867 (u64)(u32)0xffffffff,
1868 hbus->low_mmio_space,
1869 align, false);
1870 if (ret) {
1871 dev_err(&hbus->hdev->device,
1872 "Need %#llx of low MMIO space. Consider reconfiguring the VM.\n",
1873 hbus->low_mmio_space);
1874 return ret;
1875 }
1876
1877
1878 hbus->low_mmio_res->flags |= IORESOURCE_WINDOW;
1879 hbus->low_mmio_res->flags &= ~IORESOURCE_BUSY;
1880 pci_add_resource(&hbus->resources_for_children,
1881 hbus->low_mmio_res);
1882 }
1883
1884 if (hbus->high_mmio_space) {
1885 align = 1ULL << (63 - __builtin_clzll(hbus->high_mmio_space));
1886 ret = vmbus_allocate_mmio(&hbus->high_mmio_res, hbus->hdev,
1887 0x100000000, -1,
1888 hbus->high_mmio_space, align,
1889 false);
1890 if (ret) {
1891 dev_err(&hbus->hdev->device,
1892 "Need %#llx of high MMIO space. Consider reconfiguring the VM.\n",
1893 hbus->high_mmio_space);
1894 goto release_low_mmio;
1895 }
1896
1897
1898 hbus->high_mmio_res->flags |= IORESOURCE_WINDOW;
1899 hbus->high_mmio_res->flags &= ~IORESOURCE_BUSY;
1900 pci_add_resource(&hbus->resources_for_children,
1901 hbus->high_mmio_res);
1902 }
1903
1904 return 0;
1905
1906release_low_mmio:
1907 if (hbus->low_mmio_res) {
1908 vmbus_free_mmio(hbus->low_mmio_res->start,
1909 resource_size(hbus->low_mmio_res));
1910 }
1911
1912 return ret;
1913}
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924static int hv_allocate_config_window(struct hv_pcibus_device *hbus)
1925{
1926 int ret;
1927
1928
1929
1930
1931
1932 ret = vmbus_allocate_mmio(&hbus->mem_config, hbus->hdev, 0, -1,
1933 PCI_CONFIG_MMIO_LENGTH, 0x1000, false);
1934 if (ret)
1935 return ret;
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945 hbus->mem_config->flags |= IORESOURCE_BUSY;
1946
1947 return 0;
1948}
1949
1950static void hv_free_config_window(struct hv_pcibus_device *hbus)
1951{
1952 vmbus_free_mmio(hbus->mem_config->start, PCI_CONFIG_MMIO_LENGTH);
1953}
1954
1955
1956
1957
1958
1959
1960
1961static int hv_pci_enter_d0(struct hv_device *hdev)
1962{
1963 struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
1964 struct pci_bus_d0_entry *d0_entry;
1965 struct hv_pci_compl comp_pkt;
1966 struct pci_packet *pkt;
1967 int ret;
1968
1969
1970
1971
1972
1973
1974
1975 pkt = kzalloc(sizeof(*pkt) + sizeof(*d0_entry), GFP_KERNEL);
1976 if (!pkt)
1977 return -ENOMEM;
1978
1979 init_completion(&comp_pkt.host_event);
1980 pkt->completion_func = hv_pci_generic_compl;
1981 pkt->compl_ctxt = &comp_pkt;
1982 d0_entry = (struct pci_bus_d0_entry *)&pkt->message;
1983 d0_entry->message_type.type = PCI_BUS_D0ENTRY;
1984 d0_entry->mmio_base = hbus->mem_config->start;
1985
1986 ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
1987 (unsigned long)pkt, VM_PKT_DATA_INBAND,
1988 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
1989 if (ret)
1990 goto exit;
1991
1992 wait_for_completion(&comp_pkt.host_event);
1993
1994 if (comp_pkt.completion_status < 0) {
1995 dev_err(&hdev->device,
1996 "PCI Pass-through VSP failed D0 Entry with status %x\n",
1997 comp_pkt.completion_status);
1998 ret = -EPROTO;
1999 goto exit;
2000 }
2001
2002 ret = 0;
2003
2004exit:
2005 kfree(pkt);
2006 return ret;
2007}
2008
2009
2010
2011
2012
2013
2014
2015
2016static int hv_pci_query_relations(struct hv_device *hdev)
2017{
2018 struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
2019 struct pci_message message;
2020 struct completion comp;
2021 int ret;
2022
2023
2024 init_completion(&comp);
2025 if (cmpxchg(&hbus->survey_event, NULL, &comp))
2026 return -ENOTEMPTY;
2027
2028 memset(&message, 0, sizeof(message));
2029 message.type = PCI_QUERY_BUS_RELATIONS;
2030
2031 ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
2032 0, VM_PKT_DATA_INBAND, 0);
2033 if (ret)
2034 return ret;
2035
2036 wait_for_completion(&comp);
2037 return 0;
2038}
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057static int hv_send_resources_allocated(struct hv_device *hdev)
2058{
2059 struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
2060 struct pci_resources_assigned *res_assigned;
2061 struct hv_pci_compl comp_pkt;
2062 struct hv_pci_dev *hpdev;
2063 struct pci_packet *pkt;
2064 u32 wslot;
2065 int ret;
2066
2067 pkt = kmalloc(sizeof(*pkt) + sizeof(*res_assigned), GFP_KERNEL);
2068 if (!pkt)
2069 return -ENOMEM;
2070
2071 ret = 0;
2072
2073 for (wslot = 0; wslot < 256; wslot++) {
2074 hpdev = get_pcichild_wslot(hbus, wslot);
2075 if (!hpdev)
2076 continue;
2077
2078 memset(pkt, 0, sizeof(*pkt) + sizeof(*res_assigned));
2079 init_completion(&comp_pkt.host_event);
2080 pkt->completion_func = hv_pci_generic_compl;
2081 pkt->compl_ctxt = &comp_pkt;
2082 res_assigned = (struct pci_resources_assigned *)&pkt->message;
2083 res_assigned->message_type.type = PCI_RESOURCES_ASSIGNED;
2084 res_assigned->wslot.slot = hpdev->desc.win_slot.slot;
2085
2086 put_pcichild(hpdev, hv_pcidev_ref_by_slot);
2087
2088 ret = vmbus_sendpacket(
2089 hdev->channel, &pkt->message,
2090 sizeof(*res_assigned),
2091 (unsigned long)pkt,
2092 VM_PKT_DATA_INBAND,
2093 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
2094 if (ret)
2095 break;
2096
2097 wait_for_completion(&comp_pkt.host_event);
2098
2099 if (comp_pkt.completion_status < 0) {
2100 ret = -EPROTO;
2101 dev_err(&hdev->device,
2102 "resource allocated returned 0x%x",
2103 comp_pkt.completion_status);
2104 break;
2105 }
2106 }
2107
2108 kfree(pkt);
2109 return ret;
2110}
2111
2112
2113
2114
2115
2116
2117
2118
2119static int hv_send_resources_released(struct hv_device *hdev)
2120{
2121 struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
2122 struct pci_child_message pkt;
2123 struct hv_pci_dev *hpdev;
2124 u32 wslot;
2125 int ret;
2126
2127 for (wslot = 0; wslot < 256; wslot++) {
2128 hpdev = get_pcichild_wslot(hbus, wslot);
2129 if (!hpdev)
2130 continue;
2131
2132 memset(&pkt, 0, sizeof(pkt));
2133 pkt.message_type.type = PCI_RESOURCES_RELEASED;
2134 pkt.wslot.slot = hpdev->desc.win_slot.slot;
2135
2136 put_pcichild(hpdev, hv_pcidev_ref_by_slot);
2137
2138 ret = vmbus_sendpacket(hdev->channel, &pkt, sizeof(pkt), 0,
2139 VM_PKT_DATA_INBAND, 0);
2140 if (ret)
2141 return ret;
2142 }
2143
2144 return 0;
2145}
2146
2147static void get_hvpcibus(struct hv_pcibus_device *hbus)
2148{
2149 atomic_inc(&hbus->remove_lock);
2150}
2151
2152static void put_hvpcibus(struct hv_pcibus_device *hbus)
2153{
2154 if (atomic_dec_and_test(&hbus->remove_lock))
2155 complete(&hbus->remove_event);
2156}
2157
2158
2159
2160
2161
2162
2163
2164
2165static int hv_pci_probe(struct hv_device *hdev,
2166 const struct hv_vmbus_device_id *dev_id)
2167{
2168 struct hv_pcibus_device *hbus;
2169 int ret;
2170
2171 hbus = kzalloc(sizeof(*hbus), GFP_KERNEL);
2172 if (!hbus)
2173 return -ENOMEM;
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187 hbus->sysdata.domain = hdev->dev_instance.b[9] |
2188 hdev->dev_instance.b[8] << 8;
2189
2190 hbus->hdev = hdev;
2191 atomic_inc(&hbus->remove_lock);
2192 INIT_LIST_HEAD(&hbus->children);
2193 INIT_LIST_HEAD(&hbus->dr_list);
2194 INIT_LIST_HEAD(&hbus->resources_for_children);
2195 spin_lock_init(&hbus->config_lock);
2196 spin_lock_init(&hbus->device_list_lock);
2197 spin_lock_init(&hbus->retarget_msi_interrupt_lock);
2198 sema_init(&hbus->enum_sem, 1);
2199 init_completion(&hbus->remove_event);
2200
2201 ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
2202 hv_pci_onchannelcallback, hbus);
2203 if (ret)
2204 goto free_bus;
2205
2206 hv_set_drvdata(hdev, hbus);
2207
2208 ret = hv_pci_protocol_negotiation(hdev);
2209 if (ret)
2210 goto close;
2211
2212 ret = hv_allocate_config_window(hbus);
2213 if (ret)
2214 goto close;
2215
2216 hbus->cfg_addr = ioremap(hbus->mem_config->start,
2217 PCI_CONFIG_MMIO_LENGTH);
2218 if (!hbus->cfg_addr) {
2219 dev_err(&hdev->device,
2220 "Unable to map a virtual address for config space\n");
2221 ret = -ENOMEM;
2222 goto free_config;
2223 }
2224
2225 hbus->sysdata.fwnode = irq_domain_alloc_fwnode(hbus);
2226 if (!hbus->sysdata.fwnode) {
2227 ret = -ENOMEM;
2228 goto unmap;
2229 }
2230
2231 ret = hv_pcie_init_irq_domain(hbus);
2232 if (ret)
2233 goto free_fwnode;
2234
2235 ret = hv_pci_query_relations(hdev);
2236 if (ret)
2237 goto free_irq_domain;
2238
2239 ret = hv_pci_enter_d0(hdev);
2240 if (ret)
2241 goto free_irq_domain;
2242
2243 ret = hv_pci_allocate_bridge_windows(hbus);
2244 if (ret)
2245 goto free_irq_domain;
2246
2247 ret = hv_send_resources_allocated(hdev);
2248 if (ret)
2249 goto free_windows;
2250
2251 prepopulate_bars(hbus);
2252
2253 hbus->state = hv_pcibus_probed;
2254
2255 ret = create_root_hv_pci_bus(hbus);
2256 if (ret)
2257 goto free_windows;
2258
2259 return 0;
2260
2261free_windows:
2262 hv_pci_free_bridge_windows(hbus);
2263free_irq_domain:
2264 irq_domain_remove(hbus->irq_domain);
2265free_fwnode:
2266 irq_domain_free_fwnode(hbus->sysdata.fwnode);
2267unmap:
2268 iounmap(hbus->cfg_addr);
2269free_config:
2270 hv_free_config_window(hbus);
2271close:
2272 vmbus_close(hdev->channel);
2273free_bus:
2274 kfree(hbus);
2275 return ret;
2276}
2277
2278static void hv_pci_bus_exit(struct hv_device *hdev)
2279{
2280 struct hv_pcibus_device *hbus = hv_get_drvdata(hdev);
2281 struct {
2282 struct pci_packet teardown_packet;
2283 u8 buffer[sizeof(struct pci_message)];
2284 } pkt;
2285 struct pci_bus_relations relations;
2286 struct hv_pci_compl comp_pkt;
2287 int ret;
2288
2289
2290
2291
2292
2293 if (hdev->channel->rescind)
2294 return;
2295
2296
2297 memset(&relations, 0, sizeof(relations));
2298 hv_pci_devices_present(hbus, &relations);
2299
2300 ret = hv_send_resources_released(hdev);
2301 if (ret)
2302 dev_err(&hdev->device,
2303 "Couldn't send resources released packet(s)\n");
2304
2305 memset(&pkt.teardown_packet, 0, sizeof(pkt.teardown_packet));
2306 init_completion(&comp_pkt.host_event);
2307 pkt.teardown_packet.completion_func = hv_pci_generic_compl;
2308 pkt.teardown_packet.compl_ctxt = &comp_pkt;
2309 pkt.teardown_packet.message[0].type = PCI_BUS_D0EXIT;
2310
2311 ret = vmbus_sendpacket(hdev->channel, &pkt.teardown_packet.message,
2312 sizeof(struct pci_message),
2313 (unsigned long)&pkt.teardown_packet,
2314 VM_PKT_DATA_INBAND,
2315 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
2316 if (!ret)
2317 wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ);
2318}
2319
2320
2321
2322
2323
2324
2325
2326static int hv_pci_remove(struct hv_device *hdev)
2327{
2328 struct hv_pcibus_device *hbus;
2329
2330 hbus = hv_get_drvdata(hdev);
2331 if (hbus->state == hv_pcibus_installed) {
2332
2333 pci_lock_rescan_remove();
2334 pci_stop_root_bus(hbus->pci_bus);
2335 pci_remove_root_bus(hbus->pci_bus);
2336 pci_unlock_rescan_remove();
2337 }
2338
2339 hv_pci_bus_exit(hdev);
2340
2341 vmbus_close(hdev->channel);
2342
2343 iounmap(hbus->cfg_addr);
2344 hv_free_config_window(hbus);
2345 pci_free_resource_list(&hbus->resources_for_children);
2346 hv_pci_free_bridge_windows(hbus);
2347 irq_domain_remove(hbus->irq_domain);
2348 irq_domain_free_fwnode(hbus->sysdata.fwnode);
2349 put_hvpcibus(hbus);
2350 wait_for_completion(&hbus->remove_event);
2351 kfree(hbus);
2352 return 0;
2353}
2354
2355static const struct hv_vmbus_device_id hv_pci_id_table[] = {
2356
2357
2358 { HV_PCIE_GUID, },
2359 { },
2360};
2361
2362MODULE_DEVICE_TABLE(vmbus, hv_pci_id_table);
2363
2364static struct hv_driver hv_pci_drv = {
2365 .name = "hv_pci",
2366 .id_table = hv_pci_id_table,
2367 .probe = hv_pci_probe,
2368 .remove = hv_pci_remove,
2369};
2370
2371static void __exit exit_hv_pci_drv(void)
2372{
2373 vmbus_driver_unregister(&hv_pci_drv);
2374}
2375
2376static int __init init_hv_pci_drv(void)
2377{
2378 return vmbus_driver_register(&hv_pci_drv);
2379}
2380
2381module_init(init_hv_pci_drv);
2382module_exit(exit_hv_pci_drv);
2383
2384MODULE_DESCRIPTION("Hyper-V PCI");
2385MODULE_LICENSE("GPL v2");
2386