1
2
3
4
5
6
7
8
9
10
11
12#undef DEBUG
13
14#include <linux/kernel.h>
15#include <linux/pci.h>
16#include <linux/crash_dump.h>
17#include <linux/delay.h>
18#include <linux/string.h>
19#include <linux/init.h>
20#include <linux/memblock.h>
21#include <linux/irq.h>
22#include <linux/io.h>
23#include <linux/msi.h>
24#include <linux/iommu.h>
25#include <linux/rculist.h>
26#include <linux/sizes.h>
27
28#include <asm/sections.h>
29#include <asm/io.h>
30#include <asm/prom.h>
31#include <asm/pci-bridge.h>
32#include <asm/machdep.h>
33#include <asm/msi_bitmap.h>
34#include <asm/ppc-pci.h>
35#include <asm/opal.h>
36#include <asm/iommu.h>
37#include <asm/tce.h>
38#include <asm/xics.h>
39#include <asm/debugfs.h>
40#include <asm/firmware.h>
41#include <asm/pnv-pci.h>
42#include <asm/mmzone.h>
43
44#include <misc/cxl-base.h>
45
46#include "powernv.h"
47#include "pci.h"
48#include "../../../../drivers/pci/pci.h"
49
50#define PNV_IODA1_M64_NUM 16
51#define PNV_IODA1_M64_SEGS 8
52#define PNV_IODA1_DMA32_SEGSIZE 0x10000000
53
54static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
55 "NPU_OCAPI" };
56
57void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
58 const char *fmt, ...)
59{
60 struct va_format vaf;
61 va_list args;
62 char pfix[32];
63
64 va_start(args, fmt);
65
66 vaf.fmt = fmt;
67 vaf.va = &args;
68
69 if (pe->flags & PNV_IODA_PE_DEV)
70 strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
71 else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
72 sprintf(pfix, "%04x:%02x ",
73 pci_domain_nr(pe->pbus), pe->pbus->number);
74#ifdef CONFIG_PCI_IOV
75 else if (pe->flags & PNV_IODA_PE_VF)
76 sprintf(pfix, "%04x:%02x:%2x.%d",
77 pci_domain_nr(pe->parent_dev->bus),
78 (pe->rid & 0xff00) >> 8,
79 PCI_SLOT(pe->rid), PCI_FUNC(pe->rid));
80#endif
81
82 printk("%spci %s: [PE# %.2x] %pV",
83 level, pfix, pe->pe_number, &vaf);
84
85 va_end(args);
86}
87
88static bool pnv_iommu_bypass_disabled __read_mostly;
89static bool pci_reset_phbs __read_mostly;
90
91static int __init iommu_setup(char *str)
92{
93 if (!str)
94 return -EINVAL;
95
96 while (*str) {
97 if (!strncmp(str, "nobypass", 8)) {
98 pnv_iommu_bypass_disabled = true;
99 pr_info("PowerNV: IOMMU bypass window disabled.\n");
100 break;
101 }
102 str += strcspn(str, ",");
103 if (*str == ',')
104 str++;
105 }
106
107 return 0;
108}
109early_param("iommu", iommu_setup);
110
111static int __init pci_reset_phbs_setup(char *str)
112{
113 pci_reset_phbs = true;
114 return 0;
115}
116
117early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup);
118
119static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
120{
121
122
123
124
125
126
127
128 return (r->start >= phb->ioda.m64_base &&
129 r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
130}
131
132static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
133{
134 unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
135
136 return (resource_flags & flags) == flags;
137}
138
139static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
140{
141 s64 rc;
142
143 phb->ioda.pe_array[pe_no].phb = phb;
144 phb->ioda.pe_array[pe_no].pe_number = pe_no;
145
146
147
148
149
150
151 rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
152 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
153 if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
154 pr_warn("%s: Error %lld unfreezing PHB#%x-PE#%x\n",
155 __func__, rc, phb->hose->global_number, pe_no);
156
157 return &phb->ioda.pe_array[pe_no];
158}
159
160static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
161{
162 if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) {
163 pr_warn("%s: Invalid PE %x on PHB#%x\n",
164 __func__, pe_no, phb->hose->global_number);
165 return;
166 }
167
168 if (test_and_set_bit(pe_no, phb->ioda.pe_alloc))
169 pr_debug("%s: PE %x was reserved on PHB#%x\n",
170 __func__, pe_no, phb->hose->global_number);
171
172 pnv_ioda_init_pe(phb, pe_no);
173}
174
175static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
176{
177 long pe;
178
179 for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
180 if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
181 return pnv_ioda_init_pe(phb, pe);
182 }
183
184 return NULL;
185}
186
187static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
188{
189 struct pnv_phb *phb = pe->phb;
190 unsigned int pe_num = pe->pe_number;
191
192 WARN_ON(pe->pdev);
193 WARN_ON(pe->npucomp);
194 kfree(pe->npucomp);
195 memset(pe, 0, sizeof(struct pnv_ioda_pe));
196 clear_bit(pe_num, phb->ioda.pe_alloc);
197}
198
199
200static int pnv_ioda2_init_m64(struct pnv_phb *phb)
201{
202 const char *desc;
203 struct resource *r;
204 s64 rc;
205
206
207 rc = opal_pci_set_phb_mem_window(phb->opal_id,
208 OPAL_M64_WINDOW_TYPE,
209 phb->ioda.m64_bar_idx,
210 phb->ioda.m64_base,
211 0,
212 phb->ioda.m64_size);
213 if (rc != OPAL_SUCCESS) {
214 desc = "configuring";
215 goto fail;
216 }
217
218
219 rc = opal_pci_phb_mmio_enable(phb->opal_id,
220 OPAL_M64_WINDOW_TYPE,
221 phb->ioda.m64_bar_idx,
222 OPAL_ENABLE_M64_SPLIT);
223 if (rc != OPAL_SUCCESS) {
224 desc = "enabling";
225 goto fail;
226 }
227
228
229
230
231
232 r = &phb->hose->mem_resources[1];
233 if (phb->ioda.reserved_pe_idx == 0)
234 r->start += (2 * phb->ioda.m64_segsize);
235 else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
236 r->end -= (2 * phb->ioda.m64_segsize);
237 else
238 pr_warn(" Cannot strip M64 segment for reserved PE#%x\n",
239 phb->ioda.reserved_pe_idx);
240
241 return 0;
242
243fail:
244 pr_warn(" Failure %lld %s M64 BAR#%d\n",
245 rc, desc, phb->ioda.m64_bar_idx);
246 opal_pci_phb_mmio_enable(phb->opal_id,
247 OPAL_M64_WINDOW_TYPE,
248 phb->ioda.m64_bar_idx,
249 OPAL_DISABLE_M64);
250 return -EIO;
251}
252
253static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
254 unsigned long *pe_bitmap)
255{
256 struct pci_controller *hose = pci_bus_to_host(pdev->bus);
257 struct pnv_phb *phb = hose->private_data;
258 struct resource *r;
259 resource_size_t base, sgsz, start, end;
260 int segno, i;
261
262 base = phb->ioda.m64_base;
263 sgsz = phb->ioda.m64_segsize;
264 for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
265 r = &pdev->resource[i];
266 if (!r->parent || !pnv_pci_is_m64(phb, r))
267 continue;
268
269 start = ALIGN_DOWN(r->start - base, sgsz);
270 end = ALIGN(r->end - base, sgsz);
271 for (segno = start / sgsz; segno < end / sgsz; segno++) {
272 if (pe_bitmap)
273 set_bit(segno, pe_bitmap);
274 else
275 pnv_ioda_reserve_pe(phb, segno);
276 }
277 }
278}
279
280static int pnv_ioda1_init_m64(struct pnv_phb *phb)
281{
282 struct resource *r;
283 int index;
284
285
286
287
288
289
290 for (index = 0; index < PNV_IODA1_M64_NUM; index++) {
291 unsigned long base, segsz = phb->ioda.m64_segsize;
292 int64_t rc;
293
294 base = phb->ioda.m64_base +
295 index * PNV_IODA1_M64_SEGS * segsz;
296 rc = opal_pci_set_phb_mem_window(phb->opal_id,
297 OPAL_M64_WINDOW_TYPE, index, base, 0,
298 PNV_IODA1_M64_SEGS * segsz);
299 if (rc != OPAL_SUCCESS) {
300 pr_warn(" Error %lld setting M64 PHB#%x-BAR#%d\n",
301 rc, phb->hose->global_number, index);
302 goto fail;
303 }
304
305 rc = opal_pci_phb_mmio_enable(phb->opal_id,
306 OPAL_M64_WINDOW_TYPE, index,
307 OPAL_ENABLE_M64_SPLIT);
308 if (rc != OPAL_SUCCESS) {
309 pr_warn(" Error %lld enabling M64 PHB#%x-BAR#%d\n",
310 rc, phb->hose->global_number, index);
311 goto fail;
312 }
313 }
314
315
316
317
318
319 r = &phb->hose->mem_resources[1];
320 if (phb->ioda.reserved_pe_idx == 0)
321 r->start += (2 * phb->ioda.m64_segsize);
322 else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
323 r->end -= (2 * phb->ioda.m64_segsize);
324 else
325 WARN(1, "Wrong reserved PE#%x on PHB#%x\n",
326 phb->ioda.reserved_pe_idx, phb->hose->global_number);
327
328 return 0;
329
330fail:
331 for ( ; index >= 0; index--)
332 opal_pci_phb_mmio_enable(phb->opal_id,
333 OPAL_M64_WINDOW_TYPE, index, OPAL_DISABLE_M64);
334
335 return -EIO;
336}
337
338static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
339 unsigned long *pe_bitmap,
340 bool all)
341{
342 struct pci_dev *pdev;
343
344 list_for_each_entry(pdev, &bus->devices, bus_list) {
345 pnv_ioda_reserve_dev_m64_pe(pdev, pe_bitmap);
346
347 if (all && pdev->subordinate)
348 pnv_ioda_reserve_m64_pe(pdev->subordinate,
349 pe_bitmap, all);
350 }
351}
352
353static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
354{
355 struct pci_controller *hose = pci_bus_to_host(bus);
356 struct pnv_phb *phb = hose->private_data;
357 struct pnv_ioda_pe *master_pe, *pe;
358 unsigned long size, *pe_alloc;
359 int i;
360
361
362 if (pci_is_root_bus(bus))
363 return NULL;
364
365
366 size = ALIGN(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
367 pe_alloc = kzalloc(size, GFP_KERNEL);
368 if (!pe_alloc) {
369 pr_warn("%s: Out of memory !\n",
370 __func__);
371 return NULL;
372 }
373
374
375 pnv_ioda_reserve_m64_pe(bus, pe_alloc, all);
376
377
378
379
380
381
382 if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) {
383 kfree(pe_alloc);
384 return NULL;
385 }
386
387
388
389
390
391 master_pe = NULL;
392 i = -1;
393 while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe_num, i + 1)) <
394 phb->ioda.total_pe_num) {
395 pe = &phb->ioda.pe_array[i];
396
397 phb->ioda.m64_segmap[pe->pe_number] = pe->pe_number;
398 if (!master_pe) {
399 pe->flags |= PNV_IODA_PE_MASTER;
400 INIT_LIST_HEAD(&pe->slaves);
401 master_pe = pe;
402 } else {
403 pe->flags |= PNV_IODA_PE_SLAVE;
404 pe->master = master_pe;
405 list_add_tail(&pe->list, &master_pe->slaves);
406 }
407
408
409
410
411
412
413
414
415 if (phb->type == PNV_PHB_IODA1) {
416 int64_t rc;
417
418 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
419 pe->pe_number, OPAL_M64_WINDOW_TYPE,
420 pe->pe_number / PNV_IODA1_M64_SEGS,
421 pe->pe_number % PNV_IODA1_M64_SEGS);
422 if (rc != OPAL_SUCCESS)
423 pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n",
424 __func__, rc, phb->hose->global_number,
425 pe->pe_number);
426 }
427 }
428
429 kfree(pe_alloc);
430 return master_pe;
431}
432
433static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
434{
435 struct pci_controller *hose = phb->hose;
436 struct device_node *dn = hose->dn;
437 struct resource *res;
438 u32 m64_range[2], i;
439 const __be32 *r;
440 u64 pci_addr;
441
442 if (phb->type != PNV_PHB_IODA1 && phb->type != PNV_PHB_IODA2) {
443 pr_info(" Not support M64 window\n");
444 return;
445 }
446
447 if (!firmware_has_feature(FW_FEATURE_OPAL)) {
448 pr_info(" Firmware too old to support M64 window\n");
449 return;
450 }
451
452 r = of_get_property(dn, "ibm,opal-m64-window", NULL);
453 if (!r) {
454 pr_info(" No <ibm,opal-m64-window> on %pOF\n",
455 dn);
456 return;
457 }
458
459
460
461
462
463 if (of_property_read_u32_array(dn, "ibm,opal-available-m64-ranges",
464 m64_range, 2)) {
465
466 m64_range[0] = 0;
467 m64_range[1] = 16;
468 }
469
470 if (m64_range[1] > 63) {
471 pr_warn("%s: Limiting M64 range to 63 (from %d) on PHB#%x\n",
472 __func__, m64_range[1], phb->hose->global_number);
473 m64_range[1] = 63;
474 }
475
476 if (m64_range[1] <= m64_range[0]) {
477 pr_warn("%s: M64 empty, disabling M64 usage on PHB#%x\n",
478 __func__, phb->hose->global_number);
479 return;
480 }
481
482
483 res = &hose->mem_resources[1];
484 res->name = dn->full_name;
485 res->start = of_translate_address(dn, r + 2);
486 res->end = res->start + of_read_number(r + 4, 2) - 1;
487 res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
488 pci_addr = of_read_number(r, 2);
489 hose->mem_offset[1] = res->start - pci_addr;
490
491 phb->ioda.m64_size = resource_size(res);
492 phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe_num;
493 phb->ioda.m64_base = pci_addr;
494
495
496 pr_info(" MEM 0x%016llx..0x%016llx -> 0x%016llx (M64 #%d..%d)\n",
497 res->start, res->end, pci_addr, m64_range[0],
498 m64_range[0] + m64_range[1] - 1);
499
500
501 phb->ioda.m64_bar_alloc = (unsigned long)-1;
502
503
504 m64_range[1]--;
505 phb->ioda.m64_bar_idx = m64_range[0] + m64_range[1];
506
507 pr_info(" Using M64 #%d as default window\n", phb->ioda.m64_bar_idx);
508
509
510 for (i = m64_range[0]; i < m64_range[1]; i++)
511 clear_bit(i, &phb->ioda.m64_bar_alloc);
512
513
514
515
516
517 if (phb->type == PNV_PHB_IODA1)
518 phb->init_m64 = pnv_ioda1_init_m64;
519 else
520 phb->init_m64 = pnv_ioda2_init_m64;
521 phb->reserve_m64_pe = pnv_ioda_reserve_m64_pe;
522 phb->pick_m64_pe = pnv_ioda_pick_m64_pe;
523}
524
525static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
526{
527 struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no];
528 struct pnv_ioda_pe *slave;
529 s64 rc;
530
531
532 if (pe->flags & PNV_IODA_PE_SLAVE) {
533 pe = pe->master;
534 if (WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)))
535 return;
536
537 pe_no = pe->pe_number;
538 }
539
540
541 rc = opal_pci_eeh_freeze_set(phb->opal_id,
542 pe_no,
543 OPAL_EEH_ACTION_SET_FREEZE_ALL);
544 if (rc != OPAL_SUCCESS) {
545 pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
546 __func__, rc, phb->hose->global_number, pe_no);
547 return;
548 }
549
550
551 if (!(pe->flags & PNV_IODA_PE_MASTER))
552 return;
553
554 list_for_each_entry(slave, &pe->slaves, list) {
555 rc = opal_pci_eeh_freeze_set(phb->opal_id,
556 slave->pe_number,
557 OPAL_EEH_ACTION_SET_FREEZE_ALL);
558 if (rc != OPAL_SUCCESS)
559 pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
560 __func__, rc, phb->hose->global_number,
561 slave->pe_number);
562 }
563}
564
565static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt)
566{
567 struct pnv_ioda_pe *pe, *slave;
568 s64 rc;
569
570
571 pe = &phb->ioda.pe_array[pe_no];
572 if (pe->flags & PNV_IODA_PE_SLAVE) {
573 pe = pe->master;
574 WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
575 pe_no = pe->pe_number;
576 }
577
578
579 rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt);
580 if (rc != OPAL_SUCCESS) {
581 pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
582 __func__, rc, opt, phb->hose->global_number, pe_no);
583 return -EIO;
584 }
585
586 if (!(pe->flags & PNV_IODA_PE_MASTER))
587 return 0;
588
589
590 list_for_each_entry(slave, &pe->slaves, list) {
591 rc = opal_pci_eeh_freeze_clear(phb->opal_id,
592 slave->pe_number,
593 opt);
594 if (rc != OPAL_SUCCESS) {
595 pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
596 __func__, rc, opt, phb->hose->global_number,
597 slave->pe_number);
598 return -EIO;
599 }
600 }
601
602 return 0;
603}
604
605static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
606{
607 struct pnv_ioda_pe *slave, *pe;
608 u8 fstate = 0, state;
609 __be16 pcierr = 0;
610 s64 rc;
611
612
613 if (pe_no < 0 || pe_no >= phb->ioda.total_pe_num)
614 return OPAL_EEH_STOPPED_PERM_UNAVAIL;
615
616
617
618
619
620 pe = &phb->ioda.pe_array[pe_no];
621 if (pe->flags & PNV_IODA_PE_SLAVE) {
622 pe = pe->master;
623 WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
624 pe_no = pe->pe_number;
625 }
626
627
628 rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
629 &state, &pcierr, NULL);
630 if (rc != OPAL_SUCCESS) {
631 pr_warn("%s: Failure %lld getting "
632 "PHB#%x-PE#%x state\n",
633 __func__, rc,
634 phb->hose->global_number, pe_no);
635 return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
636 }
637
638
639 if (!(pe->flags & PNV_IODA_PE_MASTER))
640 return state;
641
642 list_for_each_entry(slave, &pe->slaves, list) {
643 rc = opal_pci_eeh_freeze_status(phb->opal_id,
644 slave->pe_number,
645 &fstate,
646 &pcierr,
647 NULL);
648 if (rc != OPAL_SUCCESS) {
649 pr_warn("%s: Failure %lld getting "
650 "PHB#%x-PE#%x state\n",
651 __func__, rc,
652 phb->hose->global_number, slave->pe_number);
653 return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
654 }
655
656
657
658
659
660 if (fstate > state)
661 state = fstate;
662 }
663
664 return state;
665}
666
667struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
668{
669 struct pci_controller *hose = pci_bus_to_host(dev->bus);
670 struct pnv_phb *phb = hose->private_data;
671 struct pci_dn *pdn = pci_get_pdn(dev);
672
673 if (!pdn)
674 return NULL;
675 if (pdn->pe_number == IODA_INVALID_PE)
676 return NULL;
677 return &phb->ioda.pe_array[pdn->pe_number];
678}
679
680static int pnv_ioda_set_one_peltv(struct pnv_phb *phb,
681 struct pnv_ioda_pe *parent,
682 struct pnv_ioda_pe *child,
683 bool is_add)
684{
685 const char *desc = is_add ? "adding" : "removing";
686 uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN :
687 OPAL_REMOVE_PE_FROM_DOMAIN;
688 struct pnv_ioda_pe *slave;
689 long rc;
690
691
692 rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number,
693 child->pe_number, op);
694 if (rc != OPAL_SUCCESS) {
695 pe_warn(child, "OPAL error %ld %s to parent PELTV\n",
696 rc, desc);
697 return -ENXIO;
698 }
699
700 if (!(child->flags & PNV_IODA_PE_MASTER))
701 return 0;
702
703
704 list_for_each_entry(slave, &child->slaves, list) {
705 rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number,
706 slave->pe_number, op);
707 if (rc != OPAL_SUCCESS) {
708 pe_warn(slave, "OPAL error %ld %s to parent PELTV\n",
709 rc, desc);
710 return -ENXIO;
711 }
712 }
713
714 return 0;
715}
716
717static int pnv_ioda_set_peltv(struct pnv_phb *phb,
718 struct pnv_ioda_pe *pe,
719 bool is_add)
720{
721 struct pnv_ioda_pe *slave;
722 struct pci_dev *pdev = NULL;
723 int ret;
724
725
726
727
728
729 if (is_add) {
730 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
731 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
732 if (pe->flags & PNV_IODA_PE_MASTER) {
733 list_for_each_entry(slave, &pe->slaves, list)
734 opal_pci_eeh_freeze_clear(phb->opal_id,
735 slave->pe_number,
736 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
737 }
738 }
739
740
741
742
743
744
745
746 ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add);
747 if (ret)
748 return ret;
749
750
751 if (pe->flags & PNV_IODA_PE_MASTER) {
752 list_for_each_entry(slave, &pe->slaves, list) {
753 ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add);
754 if (ret)
755 return ret;
756 }
757 }
758
759 if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS))
760 pdev = pe->pbus->self;
761 else if (pe->flags & PNV_IODA_PE_DEV)
762 pdev = pe->pdev->bus->self;
763#ifdef CONFIG_PCI_IOV
764 else if (pe->flags & PNV_IODA_PE_VF)
765 pdev = pe->parent_dev;
766#endif
767 while (pdev) {
768 struct pci_dn *pdn = pci_get_pdn(pdev);
769 struct pnv_ioda_pe *parent;
770
771 if (pdn && pdn->pe_number != IODA_INVALID_PE) {
772 parent = &phb->ioda.pe_array[pdn->pe_number];
773 ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add);
774 if (ret)
775 return ret;
776 }
777
778 pdev = pdev->bus->self;
779 }
780
781 return 0;
782}
783
784static void pnv_ioda_unset_peltv(struct pnv_phb *phb,
785 struct pnv_ioda_pe *pe,
786 struct pci_dev *parent)
787{
788 int64_t rc;
789
790 while (parent) {
791 struct pci_dn *pdn = pci_get_pdn(parent);
792
793 if (pdn && pdn->pe_number != IODA_INVALID_PE) {
794 rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
795 pe->pe_number,
796 OPAL_REMOVE_PE_FROM_DOMAIN);
797
798 }
799 parent = parent->bus->self;
800 }
801
802 opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
803 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
804
805
806 rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
807 pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
808 if (rc)
809 pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
810}
811
812static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
813{
814 struct pci_dev *parent;
815 uint8_t bcomp, dcomp, fcomp;
816 int64_t rc;
817 long rid_end, rid;
818
819
820 if (pe->pbus) {
821 int count;
822
823 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
824 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
825 parent = pe->pbus->self;
826 if (pe->flags & PNV_IODA_PE_BUS_ALL)
827 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
828 else
829 count = 1;
830
831 switch(count) {
832 case 1: bcomp = OpalPciBusAll; break;
833 case 2: bcomp = OpalPciBus7Bits; break;
834 case 4: bcomp = OpalPciBus6Bits; break;
835 case 8: bcomp = OpalPciBus5Bits; break;
836 case 16: bcomp = OpalPciBus4Bits; break;
837 case 32: bcomp = OpalPciBus3Bits; break;
838 default:
839 dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
840 count);
841
842 bcomp = OpalPciBusAll;
843 }
844 rid_end = pe->rid + (count << 8);
845 } else {
846#ifdef CONFIG_PCI_IOV
847 if (pe->flags & PNV_IODA_PE_VF)
848 parent = pe->parent_dev;
849 else
850#endif
851 parent = pe->pdev->bus->self;
852 bcomp = OpalPciBusAll;
853 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
854 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
855 rid_end = pe->rid + 1;
856 }
857
858
859 for (rid = pe->rid; rid < rid_end; rid++)
860 phb->ioda.pe_rmap[rid] = IODA_INVALID_PE;
861
862
863
864
865
866 if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
867 pnv_ioda_unset_peltv(phb, pe, parent);
868
869 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
870 bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
871 if (rc)
872 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
873
874 pe->pbus = NULL;
875 pe->pdev = NULL;
876#ifdef CONFIG_PCI_IOV
877 pe->parent_dev = NULL;
878#endif
879
880 return 0;
881}
882
883static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
884{
885 struct pci_dev *parent;
886 uint8_t bcomp, dcomp, fcomp;
887 long rc, rid_end, rid;
888
889
890 if (pe->pbus) {
891 int count;
892
893 dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
894 fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
895 parent = pe->pbus->self;
896 if (pe->flags & PNV_IODA_PE_BUS_ALL)
897 count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
898 else
899 count = 1;
900
901 switch(count) {
902 case 1: bcomp = OpalPciBusAll; break;
903 case 2: bcomp = OpalPciBus7Bits; break;
904 case 4: bcomp = OpalPciBus6Bits; break;
905 case 8: bcomp = OpalPciBus5Bits; break;
906 case 16: bcomp = OpalPciBus4Bits; break;
907 case 32: bcomp = OpalPciBus3Bits; break;
908 default:
909 dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
910 count);
911
912 bcomp = OpalPciBusAll;
913 }
914 rid_end = pe->rid + (count << 8);
915 } else {
916#ifdef CONFIG_PCI_IOV
917 if (pe->flags & PNV_IODA_PE_VF)
918 parent = pe->parent_dev;
919 else
920#endif
921 parent = pe->pdev->bus->self;
922 bcomp = OpalPciBusAll;
923 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
924 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
925 rid_end = pe->rid + 1;
926 }
927
928
929
930
931
932
933
934 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
935 bcomp, dcomp, fcomp, OPAL_MAP_PE);
936 if (rc) {
937 pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
938 return -ENXIO;
939 }
940
941
942
943
944
945 if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
946 pnv_ioda_set_peltv(phb, pe, true);
947
948
949 for (rid = pe->rid; rid < rid_end; rid++)
950 phb->ioda.pe_rmap[rid] = pe->pe_number;
951
952
953 if (phb->type != PNV_PHB_IODA1) {
954 pe->mve_number = 0;
955 goto out;
956 }
957
958 pe->mve_number = pe->pe_number;
959 rc = opal_pci_set_mve(phb->opal_id, pe->mve_number, pe->pe_number);
960 if (rc != OPAL_SUCCESS) {
961 pe_err(pe, "OPAL error %ld setting up MVE %x\n",
962 rc, pe->mve_number);
963 pe->mve_number = -1;
964 } else {
965 rc = opal_pci_set_mve_enable(phb->opal_id,
966 pe->mve_number, OPAL_ENABLE_MVE);
967 if (rc) {
968 pe_err(pe, "OPAL error %ld enabling MVE %x\n",
969 rc, pe->mve_number);
970 pe->mve_number = -1;
971 }
972 }
973
974out:
975 return 0;
976}
977
978#ifdef CONFIG_PCI_IOV
979static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
980{
981 struct pci_dn *pdn = pci_get_pdn(dev);
982 int i;
983 struct resource *res, res2;
984 resource_size_t size;
985 u16 num_vfs;
986
987 if (!dev->is_physfn)
988 return -EINVAL;
989
990
991
992
993
994
995
996
997
998 num_vfs = pdn->num_vfs;
999 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
1000 res = &dev->resource[i + PCI_IOV_RESOURCES];
1001 if (!res->flags || !res->parent)
1002 continue;
1003
1004
1005
1006
1007
1008
1009
1010 size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
1011 res2.flags = res->flags;
1012 res2.start = res->start + (size * offset);
1013 res2.end = res2.start + (size * num_vfs) - 1;
1014
1015 if (res2.end > res->end) {
1016 dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
1017 i, &res2, res, num_vfs, offset);
1018 return -EBUSY;
1019 }
1020 }
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
1031 res = &dev->resource[i + PCI_IOV_RESOURCES];
1032 if (!res->flags || !res->parent)
1033 continue;
1034
1035 size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
1036 res2 = *res;
1037 res->start += size * offset;
1038
1039 dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
1040 i, &res2, res, (offset > 0) ? "En" : "Dis",
1041 num_vfs, offset);
1042
1043 if (offset < 0) {
1044 devm_release_resource(&dev->dev, &pdn->holes[i]);
1045 memset(&pdn->holes[i], 0, sizeof(pdn->holes[i]));
1046 }
1047
1048 pci_update_resource(dev, i + PCI_IOV_RESOURCES);
1049
1050 if (offset > 0) {
1051 pdn->holes[i].start = res2.start;
1052 pdn->holes[i].end = res2.start + size * offset - 1;
1053 pdn->holes[i].flags = IORESOURCE_BUS;
1054 pdn->holes[i].name = "pnv_iov_reserved";
1055 devm_request_resource(&dev->dev, res->parent,
1056 &pdn->holes[i]);
1057 }
1058 }
1059 return 0;
1060}
1061#endif
1062
1063static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
1064{
1065 struct pci_controller *hose = pci_bus_to_host(dev->bus);
1066 struct pnv_phb *phb = hose->private_data;
1067 struct pci_dn *pdn = pci_get_pdn(dev);
1068 struct pnv_ioda_pe *pe;
1069
1070 if (!pdn) {
1071 pr_err("%s: Device tree node not associated properly\n",
1072 pci_name(dev));
1073 return NULL;
1074 }
1075 if (pdn->pe_number != IODA_INVALID_PE)
1076 return NULL;
1077
1078 pe = pnv_ioda_alloc_pe(phb);
1079 if (!pe) {
1080 pr_warn("%s: Not enough PE# available, disabling device\n",
1081 pci_name(dev));
1082 return NULL;
1083 }
1084
1085
1086
1087
1088
1089
1090
1091
1092 pdn->pe_number = pe->pe_number;
1093 pe->flags = PNV_IODA_PE_DEV;
1094 pe->pdev = dev;
1095 pe->pbus = NULL;
1096 pe->mve_number = -1;
1097 pe->rid = dev->bus->number << 8 | pdn->devfn;
1098 pe->device_count++;
1099
1100 pe_info(pe, "Associated device to PE\n");
1101
1102 if (pnv_ioda_configure_pe(phb, pe)) {
1103
1104 pnv_ioda_free_pe(pe);
1105 pdn->pe_number = IODA_INVALID_PE;
1106 pe->pdev = NULL;
1107 return NULL;
1108 }
1109
1110
1111 mutex_lock(&phb->ioda.pe_list_mutex);
1112 list_add_tail(&pe->list, &phb->ioda.pe_list);
1113 mutex_unlock(&phb->ioda.pe_list_mutex);
1114 return pe;
1115}
1116
1117static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
1118{
1119 struct pci_dev *dev;
1120
1121 list_for_each_entry(dev, &bus->devices, bus_list) {
1122 struct pci_dn *pdn = pci_get_pdn(dev);
1123
1124 if (pdn == NULL) {
1125 pr_warn("%s: No device node associated with device !\n",
1126 pci_name(dev));
1127 continue;
1128 }
1129
1130
1131
1132
1133
1134
1135 if (pdn->pe_number != IODA_INVALID_PE)
1136 continue;
1137
1138 pe->device_count++;
1139 pdn->pe_number = pe->pe_number;
1140 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
1141 pnv_ioda_setup_same_PE(dev->subordinate, pe);
1142 }
1143}
1144
1145
1146
1147
1148
1149
1150
1151static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
1152{
1153 struct pci_controller *hose = pci_bus_to_host(bus);
1154 struct pnv_phb *phb = hose->private_data;
1155 struct pnv_ioda_pe *pe = NULL;
1156 unsigned int pe_num;
1157
1158
1159
1160
1161
1162 pe_num = phb->ioda.pe_rmap[bus->number << 8];
1163 if (pe_num != IODA_INVALID_PE) {
1164 pe = &phb->ioda.pe_array[pe_num];
1165 pnv_ioda_setup_same_PE(bus, pe);
1166 return NULL;
1167 }
1168
1169
1170 if (pci_is_root_bus(bus) &&
1171 phb->ioda.root_pe_idx != IODA_INVALID_PE)
1172 pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx];
1173
1174
1175 if (!pe && phb->pick_m64_pe)
1176 pe = phb->pick_m64_pe(bus, all);
1177
1178
1179 if (!pe)
1180 pe = pnv_ioda_alloc_pe(phb);
1181
1182 if (!pe) {
1183 pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
1184 __func__, pci_domain_nr(bus), bus->number);
1185 return NULL;
1186 }
1187
1188 pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
1189 pe->pbus = bus;
1190 pe->pdev = NULL;
1191 pe->mve_number = -1;
1192 pe->rid = bus->busn_res.start << 8;
1193
1194 if (all)
1195 pe_info(pe, "Secondary bus %d..%d associated with PE#%x\n",
1196 bus->busn_res.start, bus->busn_res.end, pe->pe_number);
1197 else
1198 pe_info(pe, "Secondary bus %d associated with PE#%x\n",
1199 bus->busn_res.start, pe->pe_number);
1200
1201 if (pnv_ioda_configure_pe(phb, pe)) {
1202
1203 pnv_ioda_free_pe(pe);
1204 pe->pbus = NULL;
1205 return NULL;
1206 }
1207
1208
1209 pnv_ioda_setup_same_PE(bus, pe);
1210
1211
1212 list_add_tail(&pe->list, &phb->ioda.pe_list);
1213
1214 return pe;
1215}
1216
1217static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
1218{
1219 int pe_num, found_pe = false, rc;
1220 long rid;
1221 struct pnv_ioda_pe *pe;
1222 struct pci_dev *gpu_pdev;
1223 struct pci_dn *npu_pdn;
1224 struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus);
1225 struct pnv_phb *phb = hose->private_data;
1226
1227
1228
1229
1230
1231
1232
1233 pci_dev_get(npu_pdev);
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244 gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
1245 for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
1246 pe = &phb->ioda.pe_array[pe_num];
1247 if (!pe->pdev)
1248 continue;
1249
1250 if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) {
1251
1252
1253
1254
1255
1256 dev_info(&npu_pdev->dev,
1257 "Associating to existing PE %x\n", pe_num);
1258 npu_pdn = pci_get_pdn(npu_pdev);
1259 rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
1260 npu_pdn->pe_number = pe_num;
1261 phb->ioda.pe_rmap[rid] = pe->pe_number;
1262 pe->device_count++;
1263
1264
1265 rc = opal_pci_set_pe(phb->opal_id, pe_num, rid,
1266 OpalPciBusAll,
1267 OPAL_COMPARE_RID_DEVICE_NUMBER,
1268 OPAL_COMPARE_RID_FUNCTION_NUMBER,
1269 OPAL_MAP_PE);
1270 WARN_ON(rc != OPAL_SUCCESS);
1271 found_pe = true;
1272 break;
1273 }
1274 }
1275
1276 if (!found_pe)
1277
1278
1279
1280
1281 return pnv_ioda_setup_dev_PE(npu_pdev);
1282 else
1283 return pe;
1284}
1285
1286static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
1287{
1288 struct pci_dev *pdev;
1289
1290 list_for_each_entry(pdev, &bus->devices, bus_list)
1291 pnv_ioda_setup_npu_PE(pdev);
1292}
1293
1294static void pnv_pci_ioda_setup_PEs(void)
1295{
1296 struct pci_controller *hose;
1297 struct pnv_phb *phb;
1298 struct pnv_ioda_pe *pe;
1299
1300 list_for_each_entry(hose, &hose_list, list_node) {
1301 phb = hose->private_data;
1302 if (phb->type == PNV_PHB_NPU_NVLINK) {
1303
1304 pnv_ioda_reserve_pe(phb, 0);
1305 pnv_ioda_setup_npu_PEs(hose->bus);
1306 if (phb->model == PNV_PHB_MODEL_NPU2)
1307 WARN_ON_ONCE(pnv_npu2_init(hose));
1308 }
1309 }
1310 list_for_each_entry(hose, &hose_list, list_node) {
1311 phb = hose->private_data;
1312 if (phb->type != PNV_PHB_IODA2)
1313 continue;
1314
1315 list_for_each_entry(pe, &phb->ioda.pe_list, list)
1316 pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV);
1317 }
1318}
1319
1320#ifdef CONFIG_PCI_IOV
1321static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
1322{
1323 struct pci_bus *bus;
1324 struct pci_controller *hose;
1325 struct pnv_phb *phb;
1326 struct pci_dn *pdn;
1327 int i, j;
1328 int m64_bars;
1329
1330 bus = pdev->bus;
1331 hose = pci_bus_to_host(bus);
1332 phb = hose->private_data;
1333 pdn = pci_get_pdn(pdev);
1334
1335 if (pdn->m64_single_mode)
1336 m64_bars = num_vfs;
1337 else
1338 m64_bars = 1;
1339
1340 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
1341 for (j = 0; j < m64_bars; j++) {
1342 if (pdn->m64_map[j][i] == IODA_INVALID_M64)
1343 continue;
1344 opal_pci_phb_mmio_enable(phb->opal_id,
1345 OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0);
1346 clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc);
1347 pdn->m64_map[j][i] = IODA_INVALID_M64;
1348 }
1349
1350 kfree(pdn->m64_map);
1351 return 0;
1352}
1353
1354static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
1355{
1356 struct pci_bus *bus;
1357 struct pci_controller *hose;
1358 struct pnv_phb *phb;
1359 struct pci_dn *pdn;
1360 unsigned int win;
1361 struct resource *res;
1362 int i, j;
1363 int64_t rc;
1364 int total_vfs;
1365 resource_size_t size, start;
1366 int pe_num;
1367 int m64_bars;
1368
1369 bus = pdev->bus;
1370 hose = pci_bus_to_host(bus);
1371 phb = hose->private_data;
1372 pdn = pci_get_pdn(pdev);
1373 total_vfs = pci_sriov_get_totalvfs(pdev);
1374
1375 if (pdn->m64_single_mode)
1376 m64_bars = num_vfs;
1377 else
1378 m64_bars = 1;
1379
1380 pdn->m64_map = kmalloc_array(m64_bars,
1381 sizeof(*pdn->m64_map),
1382 GFP_KERNEL);
1383 if (!pdn->m64_map)
1384 return -ENOMEM;
1385
1386 for (i = 0; i < m64_bars ; i++)
1387 for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
1388 pdn->m64_map[i][j] = IODA_INVALID_M64;
1389
1390
1391 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
1392 res = &pdev->resource[i + PCI_IOV_RESOURCES];
1393 if (!res->flags || !res->parent)
1394 continue;
1395
1396 for (j = 0; j < m64_bars; j++) {
1397 do {
1398 win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
1399 phb->ioda.m64_bar_idx + 1, 0);
1400
1401 if (win >= phb->ioda.m64_bar_idx + 1)
1402 goto m64_failed;
1403 } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
1404
1405 pdn->m64_map[j][i] = win;
1406
1407 if (pdn->m64_single_mode) {
1408 size = pci_iov_resource_size(pdev,
1409 PCI_IOV_RESOURCES + i);
1410 start = res->start + size * j;
1411 } else {
1412 size = resource_size(res);
1413 start = res->start;
1414 }
1415
1416
1417 if (pdn->m64_single_mode) {
1418 pe_num = pdn->pe_num_map[j];
1419 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
1420 pe_num, OPAL_M64_WINDOW_TYPE,
1421 pdn->m64_map[j][i], 0);
1422 }
1423
1424 rc = opal_pci_set_phb_mem_window(phb->opal_id,
1425 OPAL_M64_WINDOW_TYPE,
1426 pdn->m64_map[j][i],
1427 start,
1428 0,
1429 size);
1430
1431
1432 if (rc != OPAL_SUCCESS) {
1433 dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
1434 win, rc);
1435 goto m64_failed;
1436 }
1437
1438 if (pdn->m64_single_mode)
1439 rc = opal_pci_phb_mmio_enable(phb->opal_id,
1440 OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2);
1441 else
1442 rc = opal_pci_phb_mmio_enable(phb->opal_id,
1443 OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1);
1444
1445 if (rc != OPAL_SUCCESS) {
1446 dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
1447 win, rc);
1448 goto m64_failed;
1449 }
1450 }
1451 }
1452 return 0;
1453
1454m64_failed:
1455 pnv_pci_vf_release_m64(pdev, num_vfs);
1456 return -EBUSY;
1457}
1458
1459static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
1460 int num);
1461
1462static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
1463{
1464 struct iommu_table *tbl;
1465 int64_t rc;
1466
1467 tbl = pe->table_group.tables[0];
1468 rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
1469 if (rc)
1470 pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
1471
1472 pnv_pci_ioda2_set_bypass(pe, false);
1473 if (pe->table_group.group) {
1474 iommu_group_put(pe->table_group.group);
1475 BUG_ON(pe->table_group.group);
1476 }
1477 iommu_tce_table_put(tbl);
1478}
1479
1480static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
1481{
1482 struct pci_bus *bus;
1483 struct pci_controller *hose;
1484 struct pnv_phb *phb;
1485 struct pnv_ioda_pe *pe, *pe_n;
1486 struct pci_dn *pdn;
1487
1488 bus = pdev->bus;
1489 hose = pci_bus_to_host(bus);
1490 phb = hose->private_data;
1491 pdn = pci_get_pdn(pdev);
1492
1493 if (!pdev->is_physfn)
1494 return;
1495
1496 list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
1497 if (pe->parent_dev != pdev)
1498 continue;
1499
1500 pnv_pci_ioda2_release_dma_pe(pdev, pe);
1501
1502
1503 mutex_lock(&phb->ioda.pe_list_mutex);
1504 list_del(&pe->list);
1505 mutex_unlock(&phb->ioda.pe_list_mutex);
1506
1507 pnv_ioda_deconfigure_pe(phb, pe);
1508
1509 pnv_ioda_free_pe(pe);
1510 }
1511}
1512
1513static void pnv_pci_sriov_disable(struct pci_dev *pdev)
1514{
1515 struct pci_bus *bus;
1516 struct pci_controller *hose;
1517 struct pnv_phb *phb;
1518 struct pnv_ioda_pe *pe;
1519 struct pci_dn *pdn;
1520 u16 num_vfs, i;
1521
1522 bus = pdev->bus;
1523 hose = pci_bus_to_host(bus);
1524 phb = hose->private_data;
1525 pdn = pci_get_pdn(pdev);
1526 num_vfs = pdn->num_vfs;
1527
1528
1529 pnv_ioda_release_vf_PE(pdev);
1530
1531 if (phb->type == PNV_PHB_IODA2) {
1532 if (!pdn->m64_single_mode)
1533 pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map);
1534
1535
1536 pnv_pci_vf_release_m64(pdev, num_vfs);
1537
1538
1539 if (pdn->m64_single_mode) {
1540 for (i = 0; i < num_vfs; i++) {
1541 if (pdn->pe_num_map[i] == IODA_INVALID_PE)
1542 continue;
1543
1544 pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
1545 pnv_ioda_free_pe(pe);
1546 }
1547 } else
1548 bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
1549
1550 kfree(pdn->pe_num_map);
1551 }
1552}
1553
1554static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
1555 struct pnv_ioda_pe *pe);
1556#ifdef CONFIG_IOMMU_API
1557static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
1558 struct iommu_table_group *table_group, struct pci_bus *bus);
1559
1560#endif
1561static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
1562{
1563 struct pci_bus *bus;
1564 struct pci_controller *hose;
1565 struct pnv_phb *phb;
1566 struct pnv_ioda_pe *pe;
1567 int pe_num;
1568 u16 vf_index;
1569 struct pci_dn *pdn;
1570
1571 bus = pdev->bus;
1572 hose = pci_bus_to_host(bus);
1573 phb = hose->private_data;
1574 pdn = pci_get_pdn(pdev);
1575
1576 if (!pdev->is_physfn)
1577 return;
1578
1579
1580 for (vf_index = 0; vf_index < num_vfs; vf_index++) {
1581 int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
1582 int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
1583 struct pci_dn *vf_pdn;
1584
1585 if (pdn->m64_single_mode)
1586 pe_num = pdn->pe_num_map[vf_index];
1587 else
1588 pe_num = *pdn->pe_num_map + vf_index;
1589
1590 pe = &phb->ioda.pe_array[pe_num];
1591 pe->pe_number = pe_num;
1592 pe->phb = phb;
1593 pe->flags = PNV_IODA_PE_VF;
1594 pe->pbus = NULL;
1595 pe->parent_dev = pdev;
1596 pe->mve_number = -1;
1597 pe->rid = (vf_bus << 8) | vf_devfn;
1598
1599 pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
1600 hose->global_number, pdev->bus->number,
1601 PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
1602
1603 if (pnv_ioda_configure_pe(phb, pe)) {
1604
1605 pnv_ioda_free_pe(pe);
1606 pe->pdev = NULL;
1607 continue;
1608 }
1609
1610
1611 mutex_lock(&phb->ioda.pe_list_mutex);
1612 list_add_tail(&pe->list, &phb->ioda.pe_list);
1613 mutex_unlock(&phb->ioda.pe_list_mutex);
1614
1615
1616 list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
1617 if (vf_pdn->busno == vf_bus &&
1618 vf_pdn->devfn == vf_devfn) {
1619 vf_pdn->pe_number = pe_num;
1620 break;
1621 }
1622 }
1623
1624 pnv_pci_ioda2_setup_dma_pe(phb, pe);
1625#ifdef CONFIG_IOMMU_API
1626 iommu_register_group(&pe->table_group,
1627 pe->phb->hose->global_number, pe->pe_number);
1628 pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL);
1629#endif
1630 }
1631}
1632
1633static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
1634{
1635 struct pci_bus *bus;
1636 struct pci_controller *hose;
1637 struct pnv_phb *phb;
1638 struct pnv_ioda_pe *pe;
1639 struct pci_dn *pdn;
1640 int ret;
1641 u16 i;
1642
1643 bus = pdev->bus;
1644 hose = pci_bus_to_host(bus);
1645 phb = hose->private_data;
1646 pdn = pci_get_pdn(pdev);
1647
1648 if (phb->type == PNV_PHB_IODA2) {
1649 if (!pdn->vfs_expanded) {
1650 dev_info(&pdev->dev, "don't support this SRIOV device"
1651 " with non 64bit-prefetchable IOV BAR\n");
1652 return -ENOSPC;
1653 }
1654
1655
1656
1657
1658
1659 if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
1660 dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
1661 return -EBUSY;
1662 }
1663
1664
1665 if (pdn->m64_single_mode)
1666 pdn->pe_num_map = kmalloc_array(num_vfs,
1667 sizeof(*pdn->pe_num_map),
1668 GFP_KERNEL);
1669 else
1670 pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL);
1671
1672 if (!pdn->pe_num_map)
1673 return -ENOMEM;
1674
1675 if (pdn->m64_single_mode)
1676 for (i = 0; i < num_vfs; i++)
1677 pdn->pe_num_map[i] = IODA_INVALID_PE;
1678
1679
1680 if (pdn->m64_single_mode) {
1681 for (i = 0; i < num_vfs; i++) {
1682 pe = pnv_ioda_alloc_pe(phb);
1683 if (!pe) {
1684 ret = -EBUSY;
1685 goto m64_failed;
1686 }
1687
1688 pdn->pe_num_map[i] = pe->pe_number;
1689 }
1690 } else {
1691 mutex_lock(&phb->ioda.pe_alloc_mutex);
1692 *pdn->pe_num_map = bitmap_find_next_zero_area(
1693 phb->ioda.pe_alloc, phb->ioda.total_pe_num,
1694 0, num_vfs, 0);
1695 if (*pdn->pe_num_map >= phb->ioda.total_pe_num) {
1696 mutex_unlock(&phb->ioda.pe_alloc_mutex);
1697 dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
1698 kfree(pdn->pe_num_map);
1699 return -EBUSY;
1700 }
1701 bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
1702 mutex_unlock(&phb->ioda.pe_alloc_mutex);
1703 }
1704 pdn->num_vfs = num_vfs;
1705
1706
1707 ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
1708 if (ret) {
1709 dev_info(&pdev->dev, "Not enough M64 window resources\n");
1710 goto m64_failed;
1711 }
1712
1713
1714
1715
1716
1717
1718 if (!pdn->m64_single_mode) {
1719 ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map);
1720 if (ret)
1721 goto m64_failed;
1722 }
1723 }
1724
1725
1726 pnv_ioda_setup_vf_PE(pdev, num_vfs);
1727
1728 return 0;
1729
1730m64_failed:
1731 if (pdn->m64_single_mode) {
1732 for (i = 0; i < num_vfs; i++) {
1733 if (pdn->pe_num_map[i] == IODA_INVALID_PE)
1734 continue;
1735
1736 pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
1737 pnv_ioda_free_pe(pe);
1738 }
1739 } else
1740 bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
1741
1742
1743 kfree(pdn->pe_num_map);
1744
1745 return ret;
1746}
1747
1748static int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
1749{
1750 pnv_pci_sriov_disable(pdev);
1751
1752
1753 remove_sriov_vf_pdns(pdev);
1754 return 0;
1755}
1756
1757static int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
1758{
1759
1760 add_sriov_vf_pdns(pdev);
1761
1762 return pnv_pci_sriov_enable(pdev, num_vfs);
1763}
1764#endif
1765
1766static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
1767{
1768 struct pci_dn *pdn = pci_get_pdn(pdev);
1769 struct pnv_ioda_pe *pe;
1770
1771
1772
1773
1774
1775
1776 if (!pdn || pdn->pe_number == IODA_INVALID_PE)
1777 return;
1778
1779 pe = &phb->ioda.pe_array[pdn->pe_number];
1780 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
1781 pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
1782 set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
1783
1784
1785
1786
1787
1788
1789}
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe)
1809{
1810 u64 window_size, table_size, tce_count, addr;
1811 struct page *table_pages;
1812 u64 tce_order = 28;
1813 __be64 *tces;
1814 s64 rc;
1815
1816
1817
1818
1819
1820 window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32));
1821 tce_count = window_size >> tce_order;
1822 table_size = tce_count << 3;
1823
1824 if (table_size < PAGE_SIZE)
1825 table_size = PAGE_SIZE;
1826
1827 table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL,
1828 get_order(table_size));
1829 if (!table_pages)
1830 goto err;
1831
1832 tces = page_address(table_pages);
1833 if (!tces)
1834 goto err;
1835
1836 memset(tces, 0, table_size);
1837
1838 for (addr = 0; addr < memory_hotplug_max(); addr += (1 << tce_order)) {
1839 tces[(addr + (1ULL << 32)) >> tce_order] =
1840 cpu_to_be64(addr | TCE_PCI_READ | TCE_PCI_WRITE);
1841 }
1842
1843 rc = opal_pci_map_pe_dma_window(pe->phb->opal_id,
1844 pe->pe_number,
1845
1846 (pe->pe_number << 1) + 0,
1847 1,
1848 __pa(tces),
1849 table_size,
1850 1 << tce_order);
1851 if (rc == OPAL_SUCCESS) {
1852 pe_info(pe, "Using 64-bit DMA iommu bypass (through TVE#0)\n");
1853 return 0;
1854 }
1855err:
1856 pe_err(pe, "Error configuring 64-bit DMA bypass\n");
1857 return -EIO;
1858}
1859
1860static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
1861 u64 dma_mask)
1862{
1863 struct pci_controller *hose = pci_bus_to_host(pdev->bus);
1864 struct pnv_phb *phb = hose->private_data;
1865 struct pci_dn *pdn = pci_get_pdn(pdev);
1866 struct pnv_ioda_pe *pe;
1867
1868 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
1869 return false;
1870
1871 pe = &phb->ioda.pe_array[pdn->pe_number];
1872 if (pe->tce_bypass_enabled) {
1873 u64 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
1874 if (dma_mask >= top)
1875 return true;
1876 }
1877
1878
1879
1880
1881
1882
1883
1884 if (dma_mask >> 32 &&
1885 dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
1886
1887 (pe->device_count == 1 || !pe->pbus) &&
1888 phb->model == PNV_PHB_MODEL_PHB3) {
1889
1890 s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe);
1891 if (rc)
1892 return false;
1893
1894 pdev->dev.archdata.dma_offset = (1ULL << 32);
1895 return true;
1896 }
1897
1898 return false;
1899}
1900
1901static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
1902{
1903 struct pci_dev *dev;
1904
1905 list_for_each_entry(dev, &bus->devices, bus_list) {
1906 set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
1907 dev->dev.archdata.dma_offset = pe->tce_bypass_base;
1908
1909 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
1910 pnv_ioda_setup_bus_dma(pe, dev->subordinate);
1911 }
1912}
1913
1914static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb,
1915 bool real_mode)
1916{
1917 return real_mode ? (__be64 __iomem *)(phb->regs_phys + 0x210) :
1918 (phb->regs + 0x210);
1919}
1920
1921static void pnv_pci_p7ioc_tce_invalidate(struct iommu_table *tbl,
1922 unsigned long index, unsigned long npages, bool rm)
1923{
1924 struct iommu_table_group_link *tgl = list_first_entry_or_null(
1925 &tbl->it_group_list, struct iommu_table_group_link,
1926 next);
1927 struct pnv_ioda_pe *pe = container_of(tgl->table_group,
1928 struct pnv_ioda_pe, table_group);
1929 __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
1930 unsigned long start, end, inc;
1931
1932 start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset);
1933 end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset +
1934 npages - 1);
1935
1936
1937 start |= (1ull << 63);
1938 end |= (1ull << 63);
1939 inc = 16;
1940 end |= inc - 1;
1941
1942 mb();
1943 while (start <= end) {
1944 if (rm)
1945 __raw_rm_writeq_be(start, invalidate);
1946 else
1947 __raw_writeq_be(start, invalidate);
1948
1949 start += inc;
1950 }
1951
1952
1953
1954
1955
1956}
1957
1958static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index,
1959 long npages, unsigned long uaddr,
1960 enum dma_data_direction direction,
1961 unsigned long attrs)
1962{
1963 int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
1964 attrs);
1965
1966 if (!ret)
1967 pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false);
1968
1969 return ret;
1970}
1971
1972#ifdef CONFIG_IOMMU_API
1973
1974static int pnv_ioda_tce_xchg_no_kill(struct iommu_table *tbl, long index,
1975 unsigned long *hpa, enum dma_data_direction *direction,
1976 bool realmode)
1977{
1978 return pnv_tce_xchg(tbl, index, hpa, direction, !realmode);
1979}
1980
1981static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
1982 unsigned long *hpa, enum dma_data_direction *direction)
1983{
1984 long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);
1985
1986 if (!ret)
1987 pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false);
1988
1989 return ret;
1990}
1991
1992static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index,
1993 unsigned long *hpa, enum dma_data_direction *direction)
1994{
1995 long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);
1996
1997 if (!ret)
1998 pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true);
1999
2000 return ret;
2001}
2002#endif
2003
2004static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
2005 long npages)
2006{
2007 pnv_tce_free(tbl, index, npages);
2008
2009 pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false);
2010}
2011
2012static struct iommu_table_ops pnv_ioda1_iommu_ops = {
2013 .set = pnv_ioda1_tce_build,
2014#ifdef CONFIG_IOMMU_API
2015 .exchange = pnv_ioda1_tce_xchg,
2016 .exchange_rm = pnv_ioda1_tce_xchg_rm,
2017 .xchg_no_kill = pnv_ioda_tce_xchg_no_kill,
2018 .tce_kill = pnv_pci_p7ioc_tce_invalidate,
2019 .useraddrptr = pnv_tce_useraddrptr,
2020#endif
2021 .clear = pnv_ioda1_tce_free,
2022 .get = pnv_tce_get,
2023};
2024
2025#define PHB3_TCE_KILL_INVAL_ALL PPC_BIT(0)
2026#define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1)
2027#define PHB3_TCE_KILL_INVAL_ONE PPC_BIT(2)
2028
2029static void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
2030{
2031 __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(phb, rm);
2032 const unsigned long val = PHB3_TCE_KILL_INVAL_ALL;
2033
2034 mb();
2035 if (rm)
2036 __raw_rm_writeq_be(val, invalidate);
2037 else
2038 __raw_writeq_be(val, invalidate);
2039}
2040
2041static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe)
2042{
2043
2044 __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false);
2045 unsigned long val = PHB3_TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF);
2046
2047 mb();
2048 __raw_writeq_be(val, invalidate);
2049}
2050
2051static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm,
2052 unsigned shift, unsigned long index,
2053 unsigned long npages)
2054{
2055 __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
2056 unsigned long start, end, inc;
2057
2058
2059 start = PHB3_TCE_KILL_INVAL_ONE;
2060 start |= (pe->pe_number & 0xFF);
2061 end = start;
2062
2063
2064 start |= (index << shift);
2065 end |= ((index + npages - 1) << shift);
2066 inc = (0x1ull << shift);
2067 mb();
2068
2069 while (start <= end) {
2070 if (rm)
2071 __raw_rm_writeq_be(start, invalidate);
2072 else
2073 __raw_writeq_be(start, invalidate);
2074 start += inc;
2075 }
2076}
2077
2078static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe)
2079{
2080 struct pnv_phb *phb = pe->phb;
2081
2082 if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
2083 pnv_pci_phb3_tce_invalidate_pe(pe);
2084 else
2085 opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL_PE,
2086 pe->pe_number, 0, 0, 0);
2087}
2088
2089static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
2090 unsigned long index, unsigned long npages, bool rm)
2091{
2092 struct iommu_table_group_link *tgl;
2093
2094 list_for_each_entry_lockless(tgl, &tbl->it_group_list, next) {
2095 struct pnv_ioda_pe *pe = container_of(tgl->table_group,
2096 struct pnv_ioda_pe, table_group);
2097 struct pnv_phb *phb = pe->phb;
2098 unsigned int shift = tbl->it_page_shift;
2099
2100
2101
2102
2103
2104
2105 if (phb->model == PNV_PHB_MODEL_NPU) {
2106
2107
2108
2109
2110
2111 pnv_pci_phb3_tce_invalidate_entire(phb, rm);
2112 continue;
2113 }
2114 if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
2115 pnv_pci_phb3_tce_invalidate(pe, rm, shift,
2116 index, npages);
2117 else
2118 opal_pci_tce_kill(phb->opal_id,
2119 OPAL_PCI_TCE_KILL_PAGES,
2120 pe->pe_number, 1u << shift,
2121 index << shift, npages);
2122 }
2123}
2124
2125void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
2126{
2127 if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3)
2128 pnv_pci_phb3_tce_invalidate_entire(phb, rm);
2129 else
2130 opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0);
2131}
2132
2133static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
2134 long npages, unsigned long uaddr,
2135 enum dma_data_direction direction,
2136 unsigned long attrs)
2137{
2138 int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
2139 attrs);
2140
2141 if (!ret)
2142 pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
2143
2144 return ret;
2145}
2146
2147#ifdef CONFIG_IOMMU_API
2148static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
2149 unsigned long *hpa, enum dma_data_direction *direction)
2150{
2151 long ret = pnv_tce_xchg(tbl, index, hpa, direction, true);
2152
2153 if (!ret)
2154 pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false);
2155
2156 return ret;
2157}
2158
2159static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index,
2160 unsigned long *hpa, enum dma_data_direction *direction)
2161{
2162 long ret = pnv_tce_xchg(tbl, index, hpa, direction, false);
2163
2164 if (!ret)
2165 pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true);
2166
2167 return ret;
2168}
2169#endif
2170
2171static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
2172 long npages)
2173{
2174 pnv_tce_free(tbl, index, npages);
2175
2176 pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false);
2177}
2178
2179static void pnv_ioda2_table_free(struct iommu_table *tbl)
2180{
2181 pnv_pci_ioda2_table_free_pages(tbl);
2182}
2183
2184static struct iommu_table_ops pnv_ioda2_iommu_ops = {
2185 .set = pnv_ioda2_tce_build,
2186#ifdef CONFIG_IOMMU_API
2187 .exchange = pnv_ioda2_tce_xchg,
2188 .exchange_rm = pnv_ioda2_tce_xchg_rm,
2189 .xchg_no_kill = pnv_ioda_tce_xchg_no_kill,
2190 .tce_kill = pnv_pci_ioda2_tce_invalidate,
2191 .useraddrptr = pnv_tce_useraddrptr,
2192#endif
2193 .clear = pnv_ioda2_tce_free,
2194 .get = pnv_tce_get,
2195 .free = pnv_ioda2_table_free,
2196};
2197
2198static int pnv_pci_ioda_dev_dma_weight(struct pci_dev *dev, void *data)
2199{
2200 unsigned int *weight = (unsigned int *)data;
2201
2202
2203
2204
2205 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
2206 return 0;
2207
2208 if (dev->class == PCI_CLASS_SERIAL_USB_UHCI ||
2209 dev->class == PCI_CLASS_SERIAL_USB_OHCI ||
2210 dev->class == PCI_CLASS_SERIAL_USB_EHCI)
2211 *weight += 3;
2212 else if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)
2213 *weight += 15;
2214 else
2215 *weight += 10;
2216
2217 return 0;
2218}
2219
2220static unsigned int pnv_pci_ioda_pe_dma_weight(struct pnv_ioda_pe *pe)
2221{
2222 unsigned int weight = 0;
2223
2224
2225#ifdef CONFIG_PCI_IOV
2226 if ((pe->flags & PNV_IODA_PE_VF) && pe->parent_dev) {
2227 pnv_pci_ioda_dev_dma_weight(pe->parent_dev, &weight);
2228 return weight;
2229 }
2230#endif
2231
2232 if ((pe->flags & PNV_IODA_PE_DEV) && pe->pdev) {
2233 pnv_pci_ioda_dev_dma_weight(pe->pdev, &weight);
2234 } else if ((pe->flags & PNV_IODA_PE_BUS) && pe->pbus) {
2235 struct pci_dev *pdev;
2236
2237 list_for_each_entry(pdev, &pe->pbus->devices, bus_list)
2238 pnv_pci_ioda_dev_dma_weight(pdev, &weight);
2239 } else if ((pe->flags & PNV_IODA_PE_BUS_ALL) && pe->pbus) {
2240 pci_walk_bus(pe->pbus, pnv_pci_ioda_dev_dma_weight, &weight);
2241 }
2242
2243 return weight;
2244}
2245
2246static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
2247 struct pnv_ioda_pe *pe)
2248{
2249
2250 struct page *tce_mem = NULL;
2251 struct iommu_table *tbl;
2252 unsigned int weight, total_weight = 0;
2253 unsigned int tce32_segsz, base, segs, avail, i;
2254 int64_t rc;
2255 void *addr;
2256
2257
2258
2259
2260 weight = pnv_pci_ioda_pe_dma_weight(pe);
2261 if (!weight)
2262 return;
2263
2264 pci_walk_bus(phb->hose->bus, pnv_pci_ioda_dev_dma_weight,
2265 &total_weight);
2266 segs = (weight * phb->ioda.dma32_count) / total_weight;
2267 if (!segs)
2268 segs = 1;
2269
2270
2271
2272
2273
2274
2275
2276 do {
2277 for (base = 0; base <= phb->ioda.dma32_count - segs; base++) {
2278 for (avail = 0, i = base; i < base + segs; i++) {
2279 if (phb->ioda.dma32_segmap[i] ==
2280 IODA_INVALID_PE)
2281 avail++;
2282 }
2283
2284 if (avail == segs)
2285 goto found;
2286 }
2287 } while (--segs);
2288
2289 if (!segs) {
2290 pe_warn(pe, "No available DMA32 segments\n");
2291 return;
2292 }
2293
2294found:
2295 tbl = pnv_pci_table_alloc(phb->hose->node);
2296 if (WARN_ON(!tbl))
2297 return;
2298
2299 iommu_register_group(&pe->table_group, phb->hose->global_number,
2300 pe->pe_number);
2301 pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
2302
2303
2304 pe_info(pe, "DMA weight %d (%d), assigned (%d) %d DMA32 segments\n",
2305 weight, total_weight, base, segs);
2306 pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
2307 base * PNV_IODA1_DMA32_SEGSIZE,
2308 (base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1);
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318 tce32_segsz = PNV_IODA1_DMA32_SEGSIZE >> (IOMMU_PAGE_SHIFT_4K - 3);
2319 tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
2320 get_order(tce32_segsz * segs));
2321 if (!tce_mem) {
2322 pe_err(pe, " Failed to allocate a 32-bit TCE memory\n");
2323 goto fail;
2324 }
2325 addr = page_address(tce_mem);
2326 memset(addr, 0, tce32_segsz * segs);
2327
2328
2329 for (i = 0; i < segs; i++) {
2330 rc = opal_pci_map_pe_dma_window(phb->opal_id,
2331 pe->pe_number,
2332 base + i, 1,
2333 __pa(addr) + tce32_segsz * i,
2334 tce32_segsz, IOMMU_PAGE_SIZE_4K);
2335 if (rc) {
2336 pe_err(pe, " Failed to configure 32-bit TCE table,"
2337 " err %ld\n", rc);
2338 goto fail;
2339 }
2340 }
2341
2342
2343 for (i = base; i < base + segs; i++)
2344 phb->ioda.dma32_segmap[i] = pe->pe_number;
2345
2346
2347 pnv_pci_setup_iommu_table(tbl, addr, tce32_segsz * segs,
2348 base * PNV_IODA1_DMA32_SEGSIZE,
2349 IOMMU_PAGE_SHIFT_4K);
2350
2351 tbl->it_ops = &pnv_ioda1_iommu_ops;
2352 pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift;
2353 pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
2354 iommu_init_table(tbl, phb->hose->node);
2355
2356 if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
2357 pnv_ioda_setup_bus_dma(pe, pe->pbus);
2358
2359 return;
2360 fail:
2361
2362 if (tce_mem)
2363 __free_pages(tce_mem, get_order(tce32_segsz * segs));
2364 if (tbl) {
2365 pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
2366 iommu_tce_table_put(tbl);
2367 }
2368}
2369
2370static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
2371 int num, struct iommu_table *tbl)
2372{
2373 struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
2374 table_group);
2375 struct pnv_phb *phb = pe->phb;
2376 int64_t rc;
2377 const unsigned long size = tbl->it_indirect_levels ?
2378 tbl->it_level_size : tbl->it_size;
2379 const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
2380 const __u64 win_size = tbl->it_size << tbl->it_page_shift;
2381
2382 pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num,
2383 start_addr, start_addr + win_size - 1,
2384 IOMMU_PAGE_SIZE(tbl));
2385
2386
2387
2388
2389
2390 rc = opal_pci_map_pe_dma_window(phb->opal_id,
2391 pe->pe_number,
2392 (pe->pe_number << 1) + num,
2393 tbl->it_indirect_levels + 1,
2394 __pa(tbl->it_base),
2395 size << 3,
2396 IOMMU_PAGE_SIZE(tbl));
2397 if (rc) {
2398 pe_err(pe, "Failed to configure TCE table, err %ld\n", rc);
2399 return rc;
2400 }
2401
2402 pnv_pci_link_table_and_group(phb->hose->node, num,
2403 tbl, &pe->table_group);
2404 pnv_pci_ioda2_tce_invalidate_pe(pe);
2405
2406 return 0;
2407}
2408
2409void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
2410{
2411 uint16_t window_id = (pe->pe_number << 1 ) + 1;
2412 int64_t rc;
2413
2414 pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
2415 if (enable) {
2416 phys_addr_t top = memblock_end_of_DRAM();
2417
2418 top = roundup_pow_of_two(top);
2419 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
2420 pe->pe_number,
2421 window_id,
2422 pe->tce_bypass_base,
2423 top);
2424 } else {
2425 rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
2426 pe->pe_number,
2427 window_id,
2428 pe->tce_bypass_base,
2429 0);
2430 }
2431 if (rc)
2432 pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
2433 else
2434 pe->tce_bypass_enabled = enable;
2435}
2436
2437static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
2438 int num, __u32 page_shift, __u64 window_size, __u32 levels,
2439 bool alloc_userspace_copy, struct iommu_table **ptbl)
2440{
2441 struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
2442 table_group);
2443 int nid = pe->phb->hose->node;
2444 __u64 bus_offset = num ? pe->tce_bypass_base : table_group->tce32_start;
2445 long ret;
2446 struct iommu_table *tbl;
2447
2448 tbl = pnv_pci_table_alloc(nid);
2449 if (!tbl)
2450 return -ENOMEM;
2451
2452 tbl->it_ops = &pnv_ioda2_iommu_ops;
2453
2454 ret = pnv_pci_ioda2_table_alloc_pages(nid,
2455 bus_offset, page_shift, window_size,
2456 levels, alloc_userspace_copy, tbl);
2457 if (ret) {
2458 iommu_tce_table_put(tbl);
2459 return ret;
2460 }
2461
2462 *ptbl = tbl;
2463
2464 return 0;
2465}
2466
2467static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
2468{
2469 struct iommu_table *tbl = NULL;
2470 long rc;
2471
2472
2473
2474
2475
2476
2477 const u64 max_memory = __rounddown_pow_of_two(memory_hotplug_max());
2478
2479
2480
2481
2482
2483
2484 const u64 window_size = min((u64)pe->table_group.tce32_size, max_memory);
2485
2486 rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
2487 IOMMU_PAGE_SHIFT_4K,
2488 window_size,
2489 POWERNV_IOMMU_DEFAULT_LEVELS, false, &tbl);
2490 if (rc) {
2491 pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
2492 rc);
2493 return rc;
2494 }
2495
2496 iommu_init_table(tbl, pe->phb->hose->node);
2497
2498 rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
2499 if (rc) {
2500 pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
2501 rc);
2502 iommu_tce_table_put(tbl);
2503 return rc;
2504 }
2505
2506 if (!pnv_iommu_bypass_disabled)
2507 pnv_pci_ioda2_set_bypass(pe, true);
2508
2509 return 0;
2510}
2511
2512#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV)
2513static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
2514 int num)
2515{
2516 struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
2517 table_group);
2518 struct pnv_phb *phb = pe->phb;
2519 long ret;
2520
2521 pe_info(pe, "Removing DMA window #%d\n", num);
2522
2523 ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
2524 (pe->pe_number << 1) + num,
2525 0, 0,
2526 0, 0);
2527 if (ret)
2528 pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
2529 else
2530 pnv_pci_ioda2_tce_invalidate_pe(pe);
2531
2532 pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
2533
2534 return ret;
2535}
2536#endif
2537
2538#ifdef CONFIG_IOMMU_API
2539unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
2540 __u64 window_size, __u32 levels)
2541{
2542 unsigned long bytes = 0;
2543 const unsigned window_shift = ilog2(window_size);
2544 unsigned entries_shift = window_shift - page_shift;
2545 unsigned table_shift = entries_shift + 3;
2546 unsigned long tce_table_size = max(0x1000UL, 1UL << table_shift);
2547 unsigned long direct_table_size;
2548
2549 if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) ||
2550 !is_power_of_2(window_size))
2551 return 0;
2552
2553
2554 entries_shift = (entries_shift + levels - 1) / levels;
2555 table_shift = entries_shift + 3;
2556 table_shift = max_t(unsigned, table_shift, PAGE_SHIFT);
2557 direct_table_size = 1UL << table_shift;
2558
2559 for ( ; levels; --levels) {
2560 bytes += ALIGN(tce_table_size, direct_table_size);
2561
2562 tce_table_size /= direct_table_size;
2563 tce_table_size <<= 3;
2564 tce_table_size = max_t(unsigned long,
2565 tce_table_size, direct_table_size);
2566 }
2567
2568 return bytes + bytes;
2569}
2570
2571static long pnv_pci_ioda2_create_table_userspace(
2572 struct iommu_table_group *table_group,
2573 int num, __u32 page_shift, __u64 window_size, __u32 levels,
2574 struct iommu_table **ptbl)
2575{
2576 long ret = pnv_pci_ioda2_create_table(table_group,
2577 num, page_shift, window_size, levels, true, ptbl);
2578
2579 if (!ret)
2580 (*ptbl)->it_allocated_size = pnv_pci_ioda2_get_table_size(
2581 page_shift, window_size, levels);
2582 return ret;
2583}
2584
2585static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
2586{
2587 struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
2588 table_group);
2589
2590 struct iommu_table *tbl = pe->table_group.tables[0];
2591
2592 pnv_pci_ioda2_set_bypass(pe, false);
2593 pnv_pci_ioda2_unset_window(&pe->table_group, 0);
2594 if (pe->pbus)
2595 pnv_ioda_setup_bus_dma(pe, pe->pbus);
2596 iommu_tce_table_put(tbl);
2597}
2598
2599static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
2600{
2601 struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
2602 table_group);
2603
2604 pnv_pci_ioda2_setup_default_config(pe);
2605 if (pe->pbus)
2606 pnv_ioda_setup_bus_dma(pe, pe->pbus);
2607}
2608
2609static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
2610 .get_table_size = pnv_pci_ioda2_get_table_size,
2611 .create_table = pnv_pci_ioda2_create_table_userspace,
2612 .set_window = pnv_pci_ioda2_set_window,
2613 .unset_window = pnv_pci_ioda2_unset_window,
2614 .take_ownership = pnv_ioda2_take_ownership,
2615 .release_ownership = pnv_ioda2_release_ownership,
2616};
2617
2618static void pnv_ioda_setup_bus_iommu_group_add_devices(struct pnv_ioda_pe *pe,
2619 struct iommu_table_group *table_group,
2620 struct pci_bus *bus)
2621{
2622 struct pci_dev *dev;
2623
2624 list_for_each_entry(dev, &bus->devices, bus_list) {
2625 iommu_add_device(table_group, &dev->dev);
2626
2627 if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
2628 pnv_ioda_setup_bus_iommu_group_add_devices(pe,
2629 table_group, dev->subordinate);
2630 }
2631}
2632
2633static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
2634 struct iommu_table_group *table_group, struct pci_bus *bus)
2635{
2636
2637 if (pe->flags & PNV_IODA_PE_DEV)
2638 iommu_add_device(table_group, &pe->pdev->dev);
2639
2640 if ((pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) || bus)
2641 pnv_ioda_setup_bus_iommu_group_add_devices(pe, table_group,
2642 bus);
2643}
2644
2645static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
2646
2647static void pnv_pci_ioda_setup_iommu_api(void)
2648{
2649 struct pci_controller *hose;
2650 struct pnv_phb *phb;
2651 struct pnv_ioda_pe *pe;
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667 list_for_each_entry(hose, &hose_list, list_node) {
2668 phb = hose->private_data;
2669
2670 if (phb->type == PNV_PHB_NPU_NVLINK ||
2671 phb->type == PNV_PHB_NPU_OCAPI)
2672 continue;
2673
2674 list_for_each_entry(pe, &phb->ioda.pe_list, list) {
2675 struct iommu_table_group *table_group;
2676
2677 table_group = pnv_try_setup_npu_table_group(pe);
2678 if (!table_group) {
2679 if (!pnv_pci_ioda_pe_dma_weight(pe))
2680 continue;
2681
2682 table_group = &pe->table_group;
2683 iommu_register_group(&pe->table_group,
2684 pe->phb->hose->global_number,
2685 pe->pe_number);
2686 }
2687 pnv_ioda_setup_bus_iommu_group(pe, table_group,
2688 pe->pbus);
2689 }
2690 }
2691
2692
2693
2694
2695
2696 list_for_each_entry(hose, &hose_list, list_node) {
2697 unsigned long pgsizes;
2698
2699 phb = hose->private_data;
2700
2701 if (phb->type != PNV_PHB_NPU_NVLINK)
2702 continue;
2703
2704 pgsizes = pnv_ioda_parse_tce_sizes(phb);
2705 list_for_each_entry(pe, &phb->ioda.pe_list, list) {
2706
2707
2708
2709
2710
2711 pe->table_group.pgsizes = pgsizes;
2712 pnv_npu_compound_attach(pe);
2713 }
2714 }
2715}
2716#else
2717static void pnv_pci_ioda_setup_iommu_api(void) { };
2718#endif
2719
2720static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
2721{
2722 struct pci_controller *hose = phb->hose;
2723 struct device_node *dn = hose->dn;
2724 unsigned long mask = 0;
2725 int i, rc, count;
2726 u32 val;
2727
2728 count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
2729 if (count <= 0) {
2730 mask = SZ_4K | SZ_64K;
2731
2732 if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
2733 !cpu_has_feature(CPU_FTR_ARCH_300))
2734 mask |= SZ_16M;
2735 return mask;
2736 }
2737
2738 for (i = 0; i < count; i++) {
2739 rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
2740 i, &val);
2741 if (rc == 0)
2742 mask |= 1ULL << val;
2743 }
2744
2745 return mask;
2746}
2747
2748static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
2749 struct pnv_ioda_pe *pe)
2750{
2751 int64_t rc;
2752
2753 if (!pnv_pci_ioda_pe_dma_weight(pe))
2754 return;
2755
2756
2757 pe->tce_bypass_base = 1ull << 59;
2758
2759
2760 pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
2761 phb->ioda.m32_pci_base);
2762
2763
2764 pe->table_group.tce32_start = 0;
2765 pe->table_group.tce32_size = phb->ioda.m32_pci_base;
2766 pe->table_group.max_dynamic_windows_supported =
2767 IOMMU_TABLE_GROUP_MAX_TABLES;
2768 pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS;
2769 pe->table_group.pgsizes = pnv_ioda_parse_tce_sizes(phb);
2770#ifdef CONFIG_IOMMU_API
2771 pe->table_group.ops = &pnv_pci_ioda2_ops;
2772#endif
2773
2774 rc = pnv_pci_ioda2_setup_default_config(pe);
2775 if (rc)
2776 return;
2777
2778 if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
2779 pnv_ioda_setup_bus_dma(pe, pe->pbus);
2780}
2781
2782int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
2783{
2784 struct pnv_phb *phb = container_of(chip, struct pnv_phb,
2785 ioda.irq_chip);
2786
2787 return opal_pci_msi_eoi(phb->opal_id, hw_irq);
2788}
2789
2790static void pnv_ioda2_msi_eoi(struct irq_data *d)
2791{
2792 int64_t rc;
2793 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
2794 struct irq_chip *chip = irq_data_get_irq_chip(d);
2795
2796 rc = pnv_opal_pci_msi_eoi(chip, hw_irq);
2797 WARN_ON_ONCE(rc);
2798
2799 icp_native_eoi(d);
2800}
2801
2802
2803void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
2804{
2805 struct irq_data *idata;
2806 struct irq_chip *ichip;
2807
2808
2809 if (phb->model != PNV_PHB_MODEL_PHB3)
2810 return;
2811
2812 if (!phb->ioda.irq_chip_init) {
2813
2814
2815
2816
2817 idata = irq_get_irq_data(virq);
2818 ichip = irq_data_get_irq_chip(idata);
2819 phb->ioda.irq_chip_init = 1;
2820 phb->ioda.irq_chip = *ichip;
2821 phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
2822 }
2823 irq_set_chip(virq, &phb->ioda.irq_chip);
2824}
2825
2826
2827
2828
2829
2830bool is_pnv_opal_msi(struct irq_chip *chip)
2831{
2832 return chip->irq_eoi == pnv_ioda2_msi_eoi;
2833}
2834EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
2835
2836static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
2837 unsigned int hwirq, unsigned int virq,
2838 unsigned int is_64, struct msi_msg *msg)
2839{
2840 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
2841 unsigned int xive_num = hwirq - phb->msi_base;
2842 __be32 data;
2843 int rc;
2844
2845
2846 if (pe == NULL)
2847 return -ENXIO;
2848
2849
2850 if (pe->mve_number < 0)
2851 return -ENXIO;
2852
2853
2854 if (dev->no_64bit_msi)
2855 is_64 = 0;
2856
2857
2858 rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
2859 if (rc) {
2860 pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
2861 pci_name(dev), rc, xive_num);
2862 return -EIO;
2863 }
2864
2865 if (is_64) {
2866 __be64 addr64;
2867
2868 rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
2869 &addr64, &data);
2870 if (rc) {
2871 pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
2872 pci_name(dev), rc);
2873 return -EIO;
2874 }
2875 msg->address_hi = be64_to_cpu(addr64) >> 32;
2876 msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful;
2877 } else {
2878 __be32 addr32;
2879
2880 rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
2881 &addr32, &data);
2882 if (rc) {
2883 pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
2884 pci_name(dev), rc);
2885 return -EIO;
2886 }
2887 msg->address_hi = 0;
2888 msg->address_lo = be32_to_cpu(addr32);
2889 }
2890 msg->data = be32_to_cpu(data);
2891
2892 pnv_set_msi_irq_chip(phb, virq);
2893
2894 pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d),"
2895 " address=%x_%08x data=%x PE# %x\n",
2896 pci_name(dev), is_64 ? "64" : "32", hwirq, xive_num,
2897 msg->address_hi, msg->address_lo, data, pe->pe_number);
2898
2899 return 0;
2900}
2901
2902static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
2903{
2904 unsigned int count;
2905 const __be32 *prop = of_get_property(phb->hose->dn,
2906 "ibm,opal-msi-ranges", NULL);
2907 if (!prop) {
2908
2909 prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
2910 }
2911 if (!prop)
2912 return;
2913
2914 phb->msi_base = be32_to_cpup(prop);
2915 count = be32_to_cpup(prop + 1);
2916 if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
2917 pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
2918 phb->hose->global_number);
2919 return;
2920 }
2921
2922 phb->msi_setup = pnv_pci_ioda_msi_setup;
2923 phb->msi32_support = 1;
2924 pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
2925 count, phb->msi_base);
2926}
2927
2928#ifdef CONFIG_PCI_IOV
2929static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
2930{
2931 struct pci_controller *hose = pci_bus_to_host(pdev->bus);
2932 struct pnv_phb *phb = hose->private_data;
2933 const resource_size_t gate = phb->ioda.m64_segsize >> 2;
2934 struct resource *res;
2935 int i;
2936 resource_size_t size, total_vf_bar_sz;
2937 struct pci_dn *pdn;
2938 int mul, total_vfs;
2939
2940 if (!pdev->is_physfn || pci_dev_is_added(pdev))
2941 return;
2942
2943 pdn = pci_get_pdn(pdev);
2944 pdn->vfs_expanded = 0;
2945 pdn->m64_single_mode = false;
2946
2947 total_vfs = pci_sriov_get_totalvfs(pdev);
2948 mul = phb->ioda.total_pe_num;
2949 total_vf_bar_sz = 0;
2950
2951 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
2952 res = &pdev->resource[i + PCI_IOV_RESOURCES];
2953 if (!res->flags || res->parent)
2954 continue;
2955 if (!pnv_pci_is_m64_flags(res->flags)) {
2956 dev_warn(&pdev->dev, "Don't support SR-IOV with"
2957 " non M64 VF BAR%d: %pR. \n",
2958 i, res);
2959 goto truncate_iov;
2960 }
2961
2962 total_vf_bar_sz += pci_iov_resource_size(pdev,
2963 i + PCI_IOV_RESOURCES);
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977 if (total_vf_bar_sz > gate) {
2978 mul = roundup_pow_of_two(total_vfs);
2979 dev_info(&pdev->dev,
2980 "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
2981 total_vf_bar_sz, gate, mul);
2982 pdn->m64_single_mode = true;
2983 break;
2984 }
2985 }
2986
2987 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
2988 res = &pdev->resource[i + PCI_IOV_RESOURCES];
2989 if (!res->flags || res->parent)
2990 continue;
2991
2992 size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
2993
2994
2995
2996
2997 if (pdn->m64_single_mode && (size < SZ_32M))
2998 goto truncate_iov;
2999 dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
3000 res->end = res->start + size * mul - 1;
3001 dev_dbg(&pdev->dev, " %pR\n", res);
3002 dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
3003 i, res, mul);
3004 }
3005 pdn->vfs_expanded = mul;
3006
3007 return;
3008
3009truncate_iov:
3010
3011 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
3012 res = &pdev->resource[i + PCI_IOV_RESOURCES];
3013 res->flags = 0;
3014 res->end = res->start - 1;
3015 }
3016}
3017#endif
3018
3019static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
3020 struct resource *res)
3021{
3022 struct pnv_phb *phb = pe->phb;
3023 struct pci_bus_region region;
3024 int index;
3025 int64_t rc;
3026
3027 if (!res || !res->flags || res->start > res->end)
3028 return;
3029
3030 if (res->flags & IORESOURCE_IO) {
3031 region.start = res->start - phb->ioda.io_pci_base;
3032 region.end = res->end - phb->ioda.io_pci_base;
3033 index = region.start / phb->ioda.io_segsize;
3034
3035 while (index < phb->ioda.total_pe_num &&
3036 region.start <= region.end) {
3037 phb->ioda.io_segmap[index] = pe->pe_number;
3038 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
3039 pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
3040 if (rc != OPAL_SUCCESS) {
3041 pr_err("%s: Error %lld mapping IO segment#%d to PE#%x\n",
3042 __func__, rc, index, pe->pe_number);
3043 break;
3044 }
3045
3046 region.start += phb->ioda.io_segsize;
3047 index++;
3048 }
3049 } else if ((res->flags & IORESOURCE_MEM) &&
3050 !pnv_pci_is_m64(phb, res)) {
3051 region.start = res->start -
3052 phb->hose->mem_offset[0] -
3053 phb->ioda.m32_pci_base;
3054 region.end = res->end -
3055 phb->hose->mem_offset[0] -
3056 phb->ioda.m32_pci_base;
3057 index = region.start / phb->ioda.m32_segsize;
3058
3059 while (index < phb->ioda.total_pe_num &&
3060 region.start <= region.end) {
3061 phb->ioda.m32_segmap[index] = pe->pe_number;
3062 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
3063 pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
3064 if (rc != OPAL_SUCCESS) {
3065 pr_err("%s: Error %lld mapping M32 segment#%d to PE#%x",
3066 __func__, rc, index, pe->pe_number);
3067 break;
3068 }
3069
3070 region.start += phb->ioda.m32_segsize;
3071 index++;
3072 }
3073 }
3074}
3075
3076
3077
3078
3079
3080
3081static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
3082{
3083 struct pci_dev *pdev;
3084 int i;
3085
3086
3087
3088
3089
3090
3091 BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
3092
3093 list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
3094 for (i = 0; i <= PCI_ROM_RESOURCE; i++)
3095 pnv_ioda_setup_pe_res(pe, &pdev->resource[i]);
3096
3097
3098
3099
3100
3101
3102 if (!(pe->flags & PNV_IODA_PE_BUS_ALL) || !pci_is_bridge(pdev))
3103 continue;
3104 for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
3105 pnv_ioda_setup_pe_res(pe,
3106 &pdev->resource[PCI_BRIDGE_RESOURCES + i]);
3107 }
3108}
3109
3110#ifdef CONFIG_DEBUG_FS
3111static int pnv_pci_diag_data_set(void *data, u64 val)
3112{
3113 struct pnv_phb *phb = data;
3114 s64 ret;
3115
3116
3117 ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
3118 phb->diag_data_size);
3119 if (ret != OPAL_SUCCESS)
3120 return -EIO;
3121
3122
3123 pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data);
3124 return 0;
3125}
3126
3127DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, pnv_pci_diag_data_set,
3128 "%llu\n");
3129
3130static int pnv_pci_ioda_pe_dump(void *data, u64 val)
3131{
3132 struct pnv_phb *phb = data;
3133 int pe_num;
3134
3135 for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
3136 struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_num];
3137
3138 if (!test_bit(pe_num, phb->ioda.pe_alloc))
3139 continue;
3140
3141 pe_warn(pe, "rid: %04x dev count: %2d flags: %s%s%s%s%s%s\n",
3142 pe->rid, pe->device_count,
3143 (pe->flags & PNV_IODA_PE_DEV) ? "dev " : "",
3144 (pe->flags & PNV_IODA_PE_BUS) ? "bus " : "",
3145 (pe->flags & PNV_IODA_PE_BUS_ALL) ? "all " : "",
3146 (pe->flags & PNV_IODA_PE_MASTER) ? "master " : "",
3147 (pe->flags & PNV_IODA_PE_SLAVE) ? "slave " : "",
3148 (pe->flags & PNV_IODA_PE_VF) ? "vf " : "");
3149 }
3150
3151 return 0;
3152}
3153
3154DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_ioda_pe_dump_fops, NULL,
3155 pnv_pci_ioda_pe_dump, "%llu\n");
3156
3157#endif
3158
3159static void pnv_pci_ioda_create_dbgfs(void)
3160{
3161#ifdef CONFIG_DEBUG_FS
3162 struct pci_controller *hose, *tmp;
3163 struct pnv_phb *phb;
3164 char name[16];
3165
3166 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
3167 phb = hose->private_data;
3168
3169
3170 phb->initialized = 1;
3171
3172 sprintf(name, "PCI%04x", hose->global_number);
3173 phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
3174 if (!phb->dbgfs) {
3175 pr_warn("%s: Error on creating debugfs on PHB#%x\n",
3176 __func__, hose->global_number);
3177 continue;
3178 }
3179
3180 debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs,
3181 phb, &pnv_pci_diag_data_fops);
3182 debugfs_create_file_unsafe("dump_ioda_pe_state", 0200, phb->dbgfs,
3183 phb, &pnv_pci_ioda_pe_dump_fops);
3184 }
3185#endif
3186}
3187
3188static void pnv_pci_enable_bridge(struct pci_bus *bus)
3189{
3190 struct pci_dev *dev = bus->self;
3191 struct pci_bus *child;
3192
3193
3194 if (list_empty(&bus->devices))
3195 return;
3196
3197
3198
3199
3200
3201
3202
3203 if (dev) {
3204 int rc = pci_enable_device(dev);
3205 if (rc)
3206 pci_err(dev, "Error enabling bridge (%d)\n", rc);
3207 pci_set_master(dev);
3208 }
3209
3210
3211 list_for_each_entry(child, &bus->children, node)
3212 pnv_pci_enable_bridge(child);
3213}
3214
3215static void pnv_pci_enable_bridges(void)
3216{
3217 struct pci_controller *hose;
3218
3219 list_for_each_entry(hose, &hose_list, list_node)
3220 pnv_pci_enable_bridge(hose->bus);
3221}
3222
3223static void pnv_pci_ioda_fixup(void)
3224{
3225 pnv_pci_ioda_setup_PEs();
3226 pnv_pci_ioda_setup_iommu_api();
3227 pnv_pci_ioda_create_dbgfs();
3228
3229 pnv_pci_enable_bridges();
3230
3231#ifdef CONFIG_EEH
3232 pnv_eeh_post_init();
3233#endif
3234}
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
3249 unsigned long type)
3250{
3251 struct pci_dev *bridge;
3252 struct pci_controller *hose = pci_bus_to_host(bus);
3253 struct pnv_phb *phb = hose->private_data;
3254 int num_pci_bridges = 0;
3255
3256 bridge = bus->self;
3257 while (bridge) {
3258 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
3259 num_pci_bridges++;
3260 if (num_pci_bridges >= 2)
3261 return 1;
3262 }
3263
3264 bridge = bridge->bus->self;
3265 }
3266
3267
3268
3269
3270
3271
3272 if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type))
3273 return phb->ioda.m64_segsize;
3274 if (type & IORESOURCE_MEM)
3275 return phb->ioda.m32_segsize;
3276
3277 return phb->ioda.io_segsize;
3278}
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
3289 unsigned long type)
3290{
3291 struct pci_controller *hose = pci_bus_to_host(bus);
3292 struct pnv_phb *phb = hose->private_data;
3293 struct pci_dev *bridge = bus->self;
3294 struct resource *r, *w;
3295 bool msi_region = false;
3296 int i;
3297
3298
3299 if (!pci_is_root_bus(bridge->bus) &&
3300 !pci_is_root_bus(bridge->bus->self->bus))
3301 return;
3302
3303
3304 for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
3305 r = &bridge->resource[PCI_BRIDGE_RESOURCES + i];
3306 if (!r->flags || !r->parent)
3307 continue;
3308
3309 w = NULL;
3310 if (r->flags & type & IORESOURCE_IO)
3311 w = &hose->io_resource;
3312 else if (pnv_pci_is_m64(phb, r) &&
3313 (type & IORESOURCE_PREFETCH) &&
3314 phb->ioda.m64_segsize)
3315 w = &hose->mem_resources[1];
3316 else if (r->flags & type & IORESOURCE_MEM) {
3317 w = &hose->mem_resources[0];
3318 msi_region = true;
3319 }
3320
3321 r->start = w->start;
3322 r->end = w->end;
3323
3324
3325
3326
3327
3328
3329
3330
3331 if (msi_region) {
3332 r->end += 0x10000;
3333 r->end -= 0x100000;
3334 }
3335 }
3336}
3337
3338static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
3339{
3340 struct pci_controller *hose = pci_bus_to_host(bus);
3341 struct pnv_phb *phb = hose->private_data;
3342 struct pci_dev *bridge = bus->self;
3343 struct pnv_ioda_pe *pe;
3344 bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
3345
3346
3347 pnv_pci_fixup_bridge_resources(bus, type);
3348
3349
3350 if (!phb->ioda.root_pe_populated) {
3351 pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false);
3352 if (pe) {
3353 phb->ioda.root_pe_idx = pe->pe_number;
3354 phb->ioda.root_pe_populated = true;
3355 }
3356 }
3357
3358
3359 if (list_empty(&bus->devices))
3360 return;
3361
3362
3363 if (phb->reserve_m64_pe)
3364 phb->reserve_m64_pe(bus, NULL, all);
3365
3366
3367
3368
3369
3370
3371 pe = pnv_ioda_setup_bus_PE(bus, all);
3372 if (!pe)
3373 return;
3374
3375 pnv_ioda_setup_pe_seg(pe);
3376 switch (phb->type) {
3377 case PNV_PHB_IODA1:
3378 pnv_pci_ioda1_setup_dma_pe(phb, pe);
3379 break;
3380 case PNV_PHB_IODA2:
3381 pnv_pci_ioda2_setup_dma_pe(phb, pe);
3382 break;
3383 default:
3384 pr_warn("%s: No DMA for PHB#%x (type %d)\n",
3385 __func__, phb->hose->global_number, phb->type);
3386 }
3387}
3388
3389static resource_size_t pnv_pci_default_alignment(void)
3390{
3391 return PAGE_SIZE;
3392}
3393
3394#ifdef CONFIG_PCI_IOV
3395static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
3396 int resno)
3397{
3398 struct pci_controller *hose = pci_bus_to_host(pdev->bus);
3399 struct pnv_phb *phb = hose->private_data;
3400 struct pci_dn *pdn = pci_get_pdn(pdev);
3401 resource_size_t align;
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420 align = pci_iov_resource_size(pdev, resno);
3421 if (!pdn->vfs_expanded)
3422 return align;
3423 if (pdn->m64_single_mode)
3424 return max(align, (resource_size_t)phb->ioda.m64_segsize);
3425
3426 return pdn->vfs_expanded * align;
3427}
3428#endif
3429
3430
3431
3432
3433bool pnv_pci_enable_device_hook(struct pci_dev *dev)
3434{
3435 struct pci_controller *hose = pci_bus_to_host(dev->bus);
3436 struct pnv_phb *phb = hose->private_data;
3437 struct pci_dn *pdn;
3438
3439
3440
3441
3442
3443
3444 if (!phb->initialized)
3445 return true;
3446
3447 pdn = pci_get_pdn(dev);
3448 if (!pdn || pdn->pe_number == IODA_INVALID_PE) {
3449 pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n");
3450 return false;
3451 }
3452
3453 return true;
3454}
3455
3456static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev)
3457{
3458 struct pci_controller *hose = pci_bus_to_host(dev->bus);
3459 struct pnv_phb *phb = hose->private_data;
3460 struct pci_dn *pdn;
3461 struct pnv_ioda_pe *pe;
3462
3463 if (!phb->initialized)
3464 return true;
3465
3466 pdn = pci_get_pdn(dev);
3467 if (!pdn)
3468 return false;
3469
3470 if (pdn->pe_number == IODA_INVALID_PE) {
3471 pe = pnv_ioda_setup_dev_PE(dev);
3472 if (!pe)
3473 return false;
3474 }
3475 return true;
3476}
3477
3478static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group,
3479 int num)
3480{
3481 struct pnv_ioda_pe *pe = container_of(table_group,
3482 struct pnv_ioda_pe, table_group);
3483 struct pnv_phb *phb = pe->phb;
3484 unsigned int idx;
3485 long rc;
3486
3487 pe_info(pe, "Removing DMA window #%d\n", num);
3488 for (idx = 0; idx < phb->ioda.dma32_count; idx++) {
3489 if (phb->ioda.dma32_segmap[idx] != pe->pe_number)
3490 continue;
3491
3492 rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
3493 idx, 0, 0ul, 0ul, 0ul);
3494 if (rc != OPAL_SUCCESS) {
3495 pe_warn(pe, "Failure %ld unmapping DMA32 segment#%d\n",
3496 rc, idx);
3497 return rc;
3498 }
3499
3500 phb->ioda.dma32_segmap[idx] = IODA_INVALID_PE;
3501 }
3502
3503 pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
3504 return OPAL_SUCCESS;
3505}
3506
3507static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
3508{
3509 unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
3510 struct iommu_table *tbl = pe->table_group.tables[0];
3511 int64_t rc;
3512
3513 if (!weight)
3514 return;
3515
3516 rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0);
3517 if (rc != OPAL_SUCCESS)
3518 return;
3519
3520 pnv_pci_p7ioc_tce_invalidate(tbl, tbl->it_offset, tbl->it_size, false);
3521 if (pe->table_group.group) {
3522 iommu_group_put(pe->table_group.group);
3523 WARN_ON(pe->table_group.group);
3524 }
3525
3526 free_pages(tbl->it_base, get_order(tbl->it_size << 3));
3527 iommu_tce_table_put(tbl);
3528}
3529
3530static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
3531{
3532 struct iommu_table *tbl = pe->table_group.tables[0];
3533 unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
3534#ifdef CONFIG_IOMMU_API
3535 int64_t rc;
3536#endif
3537
3538 if (!weight)
3539 return;
3540
3541#ifdef CONFIG_IOMMU_API
3542 rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
3543 if (rc)
3544 pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
3545#endif
3546
3547 pnv_pci_ioda2_set_bypass(pe, false);
3548 if (pe->table_group.group) {
3549 iommu_group_put(pe->table_group.group);
3550 WARN_ON(pe->table_group.group);
3551 }
3552
3553 iommu_tce_table_put(tbl);
3554}
3555
3556static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
3557 unsigned short win,
3558 unsigned int *map)
3559{
3560 struct pnv_phb *phb = pe->phb;
3561 int idx;
3562 int64_t rc;
3563
3564 for (idx = 0; idx < phb->ioda.total_pe_num; idx++) {
3565 if (map[idx] != pe->pe_number)
3566 continue;
3567
3568 if (win == OPAL_M64_WINDOW_TYPE)
3569 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
3570 phb->ioda.reserved_pe_idx, win,
3571 idx / PNV_IODA1_M64_SEGS,
3572 idx % PNV_IODA1_M64_SEGS);
3573 else
3574 rc = opal_pci_map_pe_mmio_window(phb->opal_id,
3575 phb->ioda.reserved_pe_idx, win, 0, idx);
3576
3577 if (rc != OPAL_SUCCESS)
3578 pe_warn(pe, "Error %ld unmapping (%d) segment#%d\n",
3579 rc, win, idx);
3580
3581 map[idx] = IODA_INVALID_PE;
3582 }
3583}
3584
3585static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe)
3586{
3587 struct pnv_phb *phb = pe->phb;
3588
3589 if (phb->type == PNV_PHB_IODA1) {
3590 pnv_ioda_free_pe_seg(pe, OPAL_IO_WINDOW_TYPE,
3591 phb->ioda.io_segmap);
3592 pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
3593 phb->ioda.m32_segmap);
3594 pnv_ioda_free_pe_seg(pe, OPAL_M64_WINDOW_TYPE,
3595 phb->ioda.m64_segmap);
3596 } else if (phb->type == PNV_PHB_IODA2) {
3597 pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
3598 phb->ioda.m32_segmap);
3599 }
3600}
3601
3602static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
3603{
3604 struct pnv_phb *phb = pe->phb;
3605 struct pnv_ioda_pe *slave, *tmp;
3606
3607 mutex_lock(&phb->ioda.pe_list_mutex);
3608 list_del(&pe->list);
3609 mutex_unlock(&phb->ioda.pe_list_mutex);
3610
3611 switch (phb->type) {
3612 case PNV_PHB_IODA1:
3613 pnv_pci_ioda1_release_pe_dma(pe);
3614 break;
3615 case PNV_PHB_IODA2:
3616 pnv_pci_ioda2_release_pe_dma(pe);
3617 break;
3618 case PNV_PHB_NPU_OCAPI:
3619 break;
3620 default:
3621 WARN_ON(1);
3622 }
3623
3624 pnv_ioda_release_pe_seg(pe);
3625 pnv_ioda_deconfigure_pe(pe->phb, pe);
3626
3627
3628 if (pe->flags & PNV_IODA_PE_MASTER) {
3629 list_for_each_entry_safe(slave, tmp, &pe->slaves, list) {
3630 list_del(&slave->list);
3631 pnv_ioda_free_pe(slave);
3632 }
3633 }
3634
3635
3636
3637
3638
3639
3640
3641 if (phb->ioda.root_pe_populated &&
3642 phb->ioda.root_pe_idx == pe->pe_number)
3643 phb->ioda.root_pe_populated = false;
3644 else
3645 pnv_ioda_free_pe(pe);
3646}
3647
3648static void pnv_pci_release_device(struct pci_dev *pdev)
3649{
3650 struct pci_controller *hose = pci_bus_to_host(pdev->bus);
3651 struct pnv_phb *phb = hose->private_data;
3652 struct pci_dn *pdn = pci_get_pdn(pdev);
3653 struct pnv_ioda_pe *pe;
3654
3655 if (pdev->is_virtfn)
3656 return;
3657
3658 if (!pdn || pdn->pe_number == IODA_INVALID_PE)
3659 return;
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669 pe = &phb->ioda.pe_array[pdn->pe_number];
3670 pdn->pe_number = IODA_INVALID_PE;
3671
3672 WARN_ON(--pe->device_count < 0);
3673 if (pe->device_count == 0)
3674 pnv_ioda_release_pe(pe);
3675}
3676
3677static void pnv_npu_disable_device(struct pci_dev *pdev)
3678{
3679 struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
3680 struct eeh_pe *eehpe = edev ? edev->pe : NULL;
3681
3682 if (eehpe && eeh_ops && eeh_ops->reset)
3683 eeh_ops->reset(eehpe, EEH_RESET_HOT);
3684}
3685
3686static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
3687{
3688 struct pnv_phb *phb = hose->private_data;
3689
3690 opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE,
3691 OPAL_ASSERT_RESET);
3692}
3693
3694static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
3695 .dma_dev_setup = pnv_pci_dma_dev_setup,
3696 .dma_bus_setup = pnv_pci_dma_bus_setup,
3697 .iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported,
3698 .setup_msi_irqs = pnv_setup_msi_irqs,
3699 .teardown_msi_irqs = pnv_teardown_msi_irqs,
3700 .enable_device_hook = pnv_pci_enable_device_hook,
3701 .release_device = pnv_pci_release_device,
3702 .window_alignment = pnv_pci_window_alignment,
3703 .setup_bridge = pnv_pci_setup_bridge,
3704 .reset_secondary_bus = pnv_pci_reset_secondary_bus,
3705 .shutdown = pnv_pci_ioda_shutdown,
3706};
3707
3708static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
3709 .dma_dev_setup = pnv_pci_dma_dev_setup,
3710 .setup_msi_irqs = pnv_setup_msi_irqs,
3711 .teardown_msi_irqs = pnv_teardown_msi_irqs,
3712 .enable_device_hook = pnv_pci_enable_device_hook,
3713 .window_alignment = pnv_pci_window_alignment,
3714 .reset_secondary_bus = pnv_pci_reset_secondary_bus,
3715 .shutdown = pnv_pci_ioda_shutdown,
3716 .disable_device = pnv_npu_disable_device,
3717};
3718
3719static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
3720 .enable_device_hook = pnv_ocapi_enable_device_hook,
3721 .release_device = pnv_pci_release_device,
3722 .window_alignment = pnv_pci_window_alignment,
3723 .reset_secondary_bus = pnv_pci_reset_secondary_bus,
3724 .shutdown = pnv_pci_ioda_shutdown,
3725};
3726
3727#ifdef CONFIG_CXL_BASE
3728const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = {
3729 .dma_dev_setup = pnv_pci_dma_dev_setup,
3730 .dma_bus_setup = pnv_pci_dma_bus_setup,
3731 .iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported,
3732#ifdef CONFIG_PCI_MSI
3733 .setup_msi_irqs = pnv_cxl_cx4_setup_msi_irqs,
3734 .teardown_msi_irqs = pnv_cxl_cx4_teardown_msi_irqs,
3735#endif
3736 .enable_device_hook = pnv_cxl_enable_device_hook,
3737 .disable_device = pnv_cxl_disable_device,
3738 .release_device = pnv_pci_release_device,
3739 .window_alignment = pnv_pci_window_alignment,
3740 .setup_bridge = pnv_pci_setup_bridge,
3741 .reset_secondary_bus = pnv_pci_reset_secondary_bus,
3742 .shutdown = pnv_pci_ioda_shutdown,
3743};
3744#endif
3745
3746static void __init pnv_pci_init_ioda_phb(struct device_node *np,
3747 u64 hub_id, int ioda_type)
3748{
3749 struct pci_controller *hose;
3750 struct pnv_phb *phb;
3751 unsigned long size, m64map_off, m32map_off, pemap_off;
3752 unsigned long iomap_off = 0, dma32map_off = 0;
3753 struct resource r;
3754 const __be64 *prop64;
3755 const __be32 *prop32;
3756 int len;
3757 unsigned int segno;
3758 u64 phb_id;
3759 void *aux;
3760 long rc;
3761
3762 if (!of_device_is_available(np))
3763 return;
3764
3765 pr_info("Initializing %s PHB (%pOF)\n", pnv_phb_names[ioda_type], np);
3766
3767 prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
3768 if (!prop64) {
3769 pr_err(" Missing \"ibm,opal-phbid\" property !\n");
3770 return;
3771 }
3772 phb_id = be64_to_cpup(prop64);
3773 pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
3774
3775 phb = kzalloc(sizeof(*phb), GFP_KERNEL);
3776 if (!phb)
3777 panic("%s: Failed to allocate %zu bytes\n", __func__,
3778 sizeof(*phb));
3779
3780
3781 phb->hose = hose = pcibios_alloc_controller(np);
3782 if (!phb->hose) {
3783 pr_err(" Can't allocate PCI controller for %pOF\n",
3784 np);
3785 memblock_free(__pa(phb), sizeof(struct pnv_phb));
3786 return;
3787 }
3788
3789 spin_lock_init(&phb->lock);
3790 prop32 = of_get_property(np, "bus-range", &len);
3791 if (prop32 && len == 8) {
3792 hose->first_busno = be32_to_cpu(prop32[0]);
3793 hose->last_busno = be32_to_cpu(prop32[1]);
3794 } else {
3795 pr_warn(" Broken <bus-range> on %pOF\n", np);
3796 hose->first_busno = 0;
3797 hose->last_busno = 0xff;
3798 }
3799 hose->private_data = phb;
3800 phb->hub_id = hub_id;
3801 phb->opal_id = phb_id;
3802 phb->type = ioda_type;
3803 mutex_init(&phb->ioda.pe_alloc_mutex);
3804
3805
3806 if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
3807 phb->model = PNV_PHB_MODEL_P7IOC;
3808 else if (of_device_is_compatible(np, "ibm,power8-pciex"))
3809 phb->model = PNV_PHB_MODEL_PHB3;
3810 else if (of_device_is_compatible(np, "ibm,power8-npu-pciex"))
3811 phb->model = PNV_PHB_MODEL_NPU;
3812 else if (of_device_is_compatible(np, "ibm,power9-npu-pciex"))
3813 phb->model = PNV_PHB_MODEL_NPU2;
3814 else
3815 phb->model = PNV_PHB_MODEL_UNKNOWN;
3816
3817
3818 prop32 = of_get_property(np, "ibm,phb-diag-data-size", NULL);
3819 if (prop32)
3820 phb->diag_data_size = be32_to_cpup(prop32);
3821 else
3822 phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE;
3823
3824 phb->diag_data = kzalloc(phb->diag_data_size, GFP_KERNEL);
3825 if (!phb->diag_data)
3826 panic("%s: Failed to allocate %u bytes\n", __func__,
3827 phb->diag_data_size);
3828
3829
3830 pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
3831
3832
3833 if (!of_address_to_resource(np, 0, &r)) {
3834 phb->regs_phys = r.start;
3835 phb->regs = ioremap(r.start, resource_size(&r));
3836 if (phb->regs == NULL)
3837 pr_err(" Failed to map registers !\n");
3838 }
3839
3840
3841 phb->ioda.total_pe_num = 1;
3842 prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
3843 if (prop32)
3844 phb->ioda.total_pe_num = be32_to_cpup(prop32);
3845 prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
3846 if (prop32)
3847 phb->ioda.reserved_pe_idx = be32_to_cpup(prop32);
3848
3849
3850 for (segno = 0; segno < ARRAY_SIZE(phb->ioda.pe_rmap); segno++)
3851 phb->ioda.pe_rmap[segno] = IODA_INVALID_PE;
3852
3853
3854 pnv_ioda_parse_m64_window(phb);
3855
3856 phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
3857
3858 phb->ioda.m32_size += 0x10000;
3859
3860 phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe_num;
3861 phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
3862 phb->ioda.io_size = hose->pci_io_size;
3863 phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num;
3864 phb->ioda.io_pci_base = 0;
3865
3866
3867 phb->ioda.dma32_count = phb->ioda.m32_pci_base /
3868 PNV_IODA1_DMA32_SEGSIZE;
3869
3870
3871 size = ALIGN(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8,
3872 sizeof(unsigned long));
3873 m64map_off = size;
3874 size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]);
3875 m32map_off = size;
3876 size += phb->ioda.total_pe_num * sizeof(phb->ioda.m32_segmap[0]);
3877 if (phb->type == PNV_PHB_IODA1) {
3878 iomap_off = size;
3879 size += phb->ioda.total_pe_num * sizeof(phb->ioda.io_segmap[0]);
3880 dma32map_off = size;
3881 size += phb->ioda.dma32_count *
3882 sizeof(phb->ioda.dma32_segmap[0]);
3883 }
3884 pemap_off = size;
3885 size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
3886 aux = kzalloc(size, GFP_KERNEL);
3887 if (!aux)
3888 panic("%s: Failed to allocate %lu bytes\n", __func__, size);
3889
3890 phb->ioda.pe_alloc = aux;
3891 phb->ioda.m64_segmap = aux + m64map_off;
3892 phb->ioda.m32_segmap = aux + m32map_off;
3893 for (segno = 0; segno < phb->ioda.total_pe_num; segno++) {
3894 phb->ioda.m64_segmap[segno] = IODA_INVALID_PE;
3895 phb->ioda.m32_segmap[segno] = IODA_INVALID_PE;
3896 }
3897 if (phb->type == PNV_PHB_IODA1) {
3898 phb->ioda.io_segmap = aux + iomap_off;
3899 for (segno = 0; segno < phb->ioda.total_pe_num; segno++)
3900 phb->ioda.io_segmap[segno] = IODA_INVALID_PE;
3901
3902 phb->ioda.dma32_segmap = aux + dma32map_off;
3903 for (segno = 0; segno < phb->ioda.dma32_count; segno++)
3904 phb->ioda.dma32_segmap[segno] = IODA_INVALID_PE;
3905 }
3906 phb->ioda.pe_array = aux + pemap_off;
3907
3908
3909
3910
3911
3912
3913 pnv_ioda_reserve_pe(phb, phb->ioda.reserved_pe_idx);
3914 if (phb->ioda.reserved_pe_idx == 0) {
3915 phb->ioda.root_pe_idx = 1;
3916 pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
3917 } else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) {
3918 phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1;
3919 pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
3920 } else {
3921 phb->ioda.root_pe_idx = IODA_INVALID_PE;
3922 }
3923
3924 INIT_LIST_HEAD(&phb->ioda.pe_list);
3925 mutex_init(&phb->ioda.pe_list_mutex);
3926
3927
3928 phb->ioda.dma32_count = phb->ioda.m32_pci_base /
3929 PNV_IODA1_DMA32_SEGSIZE;
3930
3931#if 0
3932 rc = opal_pci_set_phb_mem_window(opal->phb_id,
3933 window_type,
3934 window_num,
3935 starting_real_address,
3936 starting_pci_address,
3937 segment_size);
3938#endif
3939
3940 pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
3941 phb->ioda.total_pe_num, phb->ioda.reserved_pe_idx,
3942 phb->ioda.m32_size, phb->ioda.m32_segsize);
3943 if (phb->ioda.m64_size)
3944 pr_info(" M64: 0x%lx [segment=0x%lx]\n",
3945 phb->ioda.m64_size, phb->ioda.m64_segsize);
3946 if (phb->ioda.io_size)
3947 pr_info(" IO: 0x%x [segment=0x%x]\n",
3948 phb->ioda.io_size, phb->ioda.io_segsize);
3949
3950
3951 phb->hose->ops = &pnv_pci_ops;
3952 phb->get_pe_state = pnv_ioda_get_pe_state;
3953 phb->freeze_pe = pnv_ioda_freeze_pe;
3954 phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
3955
3956
3957 pnv_pci_init_ioda_msis(phb);
3958
3959
3960
3961
3962
3963
3964
3965
3966 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
3967
3968 switch (phb->type) {
3969 case PNV_PHB_NPU_NVLINK:
3970 hose->controller_ops = pnv_npu_ioda_controller_ops;
3971 break;
3972 case PNV_PHB_NPU_OCAPI:
3973 hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
3974 break;
3975 default:
3976 phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
3977 hose->controller_ops = pnv_pci_ioda_controller_ops;
3978 }
3979
3980 ppc_md.pcibios_default_alignment = pnv_pci_default_alignment;
3981
3982#ifdef CONFIG_PCI_IOV
3983 ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
3984 ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment;
3985 ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
3986 ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable;
3987#endif
3988
3989 pci_add_flags(PCI_REASSIGN_ALL_RSRC);
3990
3991
3992 rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET);
3993 if (rc)
3994 pr_warn(" OPAL Error %ld performing IODA table reset !\n", rc);
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006 if (is_kdump_kernel() || pci_reset_phbs || rc) {
4007 pr_info(" Issue PHB reset ...\n");
4008 pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
4009 pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
4010 }
4011
4012
4013 if (!phb->init_m64 || phb->init_m64(phb))
4014 hose->mem_resources[1].flags = 0;
4015
4016
4017 pci_devs_phb_init_dynamic(hose);
4018}
4019
4020void __init pnv_pci_init_ioda2_phb(struct device_node *np)
4021{
4022 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
4023}
4024
4025void __init pnv_pci_init_npu_phb(struct device_node *np)
4026{
4027 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK);
4028}
4029
4030void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
4031{
4032 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI);
4033}
4034
4035static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev)
4036{
4037 struct pci_controller *hose = pci_bus_to_host(dev->bus);
4038 struct pnv_phb *phb = hose->private_data;
4039
4040 if (!machine_is(powernv))
4041 return;
4042
4043 if (phb->type == PNV_PHB_NPU_OCAPI)
4044 dev->cfg_size = PCI_CFG_SPACE_EXP_SIZE;
4045}
4046DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pnv_npu2_opencapi_cfg_size_fixup);
4047
4048void __init pnv_pci_init_ioda_hub(struct device_node *np)
4049{
4050 struct device_node *phbn;
4051 const __be64 *prop64;
4052 u64 hub_id;
4053
4054 pr_info("Probing IODA IO-Hub %pOF\n", np);
4055
4056 prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
4057 if (!prop64) {
4058 pr_err(" Missing \"ibm,opal-hubid\" property !\n");
4059 return;
4060 }
4061 hub_id = be64_to_cpup(prop64);
4062 pr_devel(" HUB-ID : 0x%016llx\n", hub_id);
4063
4064
4065 for_each_child_of_node(np, phbn) {
4066
4067 if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
4068 pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
4069 }
4070}
4071