1
2
3
4
5
6
7
8
9#include <linux/seq_file.h>
10#include <linux/proc_fs.h>
11#include <linux/debugfs.h>
12#include <linux/kernel.h>
13#include <linux/slab.h>
14#include <linux/delay.h>
15
16#include <asm/mmu_context.h>
17#include <asm/uv/uv.h>
18#include <asm/uv/uv_mmrs.h>
19#include <asm/uv/uv_hub.h>
20#include <asm/uv/uv_bau.h>
21#include <asm/apic.h>
22#include <asm/idle.h>
23#include <asm/tsc.h>
24#include <asm/irq_vectors.h>
25#include <asm/timer.h>
26
27static struct bau_operations ops __read_mostly;
28
29
30static int timeout_base_ns[] = {
31 20,
32 160,
33 1280,
34 10240,
35 81920,
36 655360,
37 5242880,
38 167772160
39};
40
41static int timeout_us;
42static bool nobau = true;
43static int nobau_perm;
44static cycles_t congested_cycles;
45
46
47static int max_concurr = MAX_BAU_CONCURRENT;
48static int max_concurr_const = MAX_BAU_CONCURRENT;
49static int plugged_delay = PLUGGED_DELAY;
50static int plugsb4reset = PLUGSB4RESET;
51static int giveup_limit = GIVEUP_LIMIT;
52static int timeoutsb4reset = TIMEOUTSB4RESET;
53static int ipi_reset_limit = IPI_RESET_LIMIT;
54static int complete_threshold = COMPLETE_THRESHOLD;
55static int congested_respns_us = CONGESTED_RESPONSE_US;
56static int congested_reps = CONGESTED_REPS;
57static int disabled_period = DISABLED_PERIOD;
58
59static struct tunables tunables[] = {
60 {&max_concurr, MAX_BAU_CONCURRENT},
61 {&plugged_delay, PLUGGED_DELAY},
62 {&plugsb4reset, PLUGSB4RESET},
63 {&timeoutsb4reset, TIMEOUTSB4RESET},
64 {&ipi_reset_limit, IPI_RESET_LIMIT},
65 {&complete_threshold, COMPLETE_THRESHOLD},
66 {&congested_respns_us, CONGESTED_RESPONSE_US},
67 {&congested_reps, CONGESTED_REPS},
68 {&disabled_period, DISABLED_PERIOD},
69 {&giveup_limit, GIVEUP_LIMIT}
70};
71
72static struct dentry *tunables_dir;
73static struct dentry *tunables_file;
74
75
76static char *stat_description[] = {
77 "sent: number of shootdown messages sent",
78 "stime: time spent sending messages",
79 "numuvhubs: number of hubs targeted with shootdown",
80 "numuvhubs16: number times 16 or more hubs targeted",
81 "numuvhubs8: number times 8 or more hubs targeted",
82 "numuvhubs4: number times 4 or more hubs targeted",
83 "numuvhubs2: number times 2 or more hubs targeted",
84 "numuvhubs1: number times 1 hub targeted",
85 "numcpus: number of cpus targeted with shootdown",
86 "dto: number of destination timeouts",
87 "retries: destination timeout retries sent",
88 "rok: : destination timeouts successfully retried",
89 "resetp: ipi-style resource resets for plugs",
90 "resett: ipi-style resource resets for timeouts",
91 "giveup: fall-backs to ipi-style shootdowns",
92 "sto: number of source timeouts",
93 "bz: number of stay-busy's",
94 "throt: number times spun in throttle",
95 "swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE",
96 "recv: shootdown messages received",
97 "rtime: time spent processing messages",
98 "all: shootdown all-tlb messages",
99 "one: shootdown one-tlb messages",
100 "mult: interrupts that found multiple messages",
101 "none: interrupts that found no messages",
102 "retry: number of retry messages processed",
103 "canc: number messages canceled by retries",
104 "nocan: number retries that found nothing to cancel",
105 "reset: number of ipi-style reset requests processed",
106 "rcan: number messages canceled by reset requests",
107 "disable: number times use of the BAU was disabled",
108 "enable: number times use of the BAU was re-enabled"
109};
110
111static int __init setup_bau(char *arg)
112{
113 int result;
114
115 if (!arg)
116 return -EINVAL;
117
118 result = strtobool(arg, &nobau);
119 if (result)
120 return result;
121
122
123 nobau = !nobau;
124
125 if (!nobau)
126 pr_info("UV BAU Enabled\n");
127 else
128 pr_info("UV BAU Disabled\n");
129
130 return 0;
131}
132early_param("bau", setup_bau);
133
134
135static int uv_base_pnode __read_mostly;
136
137static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
138static DEFINE_PER_CPU(struct bau_control, bau_control);
139static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
140
141static void
142set_bau_on(void)
143{
144 int cpu;
145 struct bau_control *bcp;
146
147 if (nobau_perm) {
148 pr_info("BAU not initialized; cannot be turned on\n");
149 return;
150 }
151 nobau = false;
152 for_each_present_cpu(cpu) {
153 bcp = &per_cpu(bau_control, cpu);
154 bcp->nobau = false;
155 }
156 pr_info("BAU turned on\n");
157 return;
158}
159
160static void
161set_bau_off(void)
162{
163 int cpu;
164 struct bau_control *bcp;
165
166 nobau = true;
167 for_each_present_cpu(cpu) {
168 bcp = &per_cpu(bau_control, cpu);
169 bcp->nobau = true;
170 }
171 pr_info("BAU turned off\n");
172 return;
173}
174
175
176
177
178
179static int __init uvhub_to_first_node(int uvhub)
180{
181 int node, b;
182
183 for_each_online_node(node) {
184 b = uv_node_to_blade_id(node);
185 if (uvhub == b)
186 return node;
187 }
188 return -1;
189}
190
191
192
193
194static int __init uvhub_to_first_apicid(int uvhub)
195{
196 int cpu;
197
198 for_each_present_cpu(cpu)
199 if (uvhub == uv_cpu_to_blade_id(cpu))
200 return per_cpu(x86_cpu_to_apicid, cpu);
201 return -1;
202}
203
204
205
206
207
208
209
210
211
212static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp,
213 int do_acknowledge)
214{
215 unsigned long dw;
216 struct bau_pq_entry *msg;
217
218 msg = mdp->msg;
219 if (!msg->canceled && do_acknowledge) {
220 dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
221 ops.write_l_sw_ack(dw);
222 }
223 msg->replied_to = 1;
224 msg->swack_vec = 0;
225}
226
227
228
229
230static void bau_process_retry_msg(struct msg_desc *mdp,
231 struct bau_control *bcp)
232{
233 int i;
234 int cancel_count = 0;
235 unsigned long msg_res;
236 unsigned long mmr = 0;
237 struct bau_pq_entry *msg = mdp->msg;
238 struct bau_pq_entry *msg2;
239 struct ptc_stats *stat = bcp->statp;
240
241 stat->d_retries++;
242
243
244
245 for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
246 if (msg2 > mdp->queue_last)
247 msg2 = mdp->queue_first;
248 if (msg2 == msg)
249 break;
250
251
252 if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
253 (msg2->swack_vec) && ((msg2->swack_vec &
254 msg->swack_vec) == 0) &&
255 (msg2->sending_cpu == msg->sending_cpu) &&
256 (msg2->msg_type != MSG_NOOP)) {
257 mmr = ops.read_l_sw_ack();
258 msg_res = msg2->swack_vec;
259
260
261
262
263
264
265 if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
266 unsigned long mr;
267
268
269
270
271 msg2->canceled = 1;
272 stat->d_canceled++;
273 cancel_count++;
274 mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
275 ops.write_l_sw_ack(mr);
276 }
277 }
278 }
279 if (!cancel_count)
280 stat->d_nocanceled++;
281}
282
283
284
285
286
287static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
288 int do_acknowledge)
289{
290 short socket_ack_count = 0;
291 short *sp;
292 struct atomic_short *asp;
293 struct ptc_stats *stat = bcp->statp;
294 struct bau_pq_entry *msg = mdp->msg;
295 struct bau_control *smaster = bcp->socket_master;
296
297
298
299
300 if (msg->address == TLB_FLUSH_ALL) {
301 local_flush_tlb();
302 stat->d_alltlb++;
303 } else {
304 __flush_tlb_one(msg->address);
305 stat->d_onetlb++;
306 }
307 stat->d_requestee++;
308
309
310
311
312
313
314
315 if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
316 bau_process_retry_msg(mdp, bcp);
317
318
319
320
321
322
323
324 sp = &smaster->socket_acknowledge_count[mdp->msg_slot];
325 asp = (struct atomic_short *)sp;
326 socket_ack_count = atom_asr(1, asp);
327 if (socket_ack_count == bcp->cpus_in_socket) {
328 int msg_ack_count;
329
330
331
332
333 *sp = 0;
334 asp = (struct atomic_short *)&msg->acknowledge_count;
335 msg_ack_count = atom_asr(socket_ack_count, asp);
336
337 if (msg_ack_count == bcp->cpus_in_uvhub) {
338
339
340
341
342 reply_to_message(mdp, bcp, do_acknowledge);
343 }
344 }
345
346 return;
347}
348
349
350
351
352static int pnode_to_first_cpu(int pnode, struct bau_control *smaster)
353{
354 int cpu;
355 struct hub_and_pnode *hpp;
356
357 for_each_present_cpu(cpu) {
358 hpp = &smaster->thp[cpu];
359 if (pnode == hpp->pnode)
360 return cpu;
361 }
362 return -1;
363}
364
365
366
367
368
369
370
371
372
373
374
375static void do_reset(void *ptr)
376{
377 int i;
378 struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id());
379 struct reset_args *rap = (struct reset_args *)ptr;
380 struct bau_pq_entry *msg;
381 struct ptc_stats *stat = bcp->statp;
382
383 stat->d_resets++;
384
385
386
387
388
389
390 for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
391 unsigned long msg_res;
392
393
394 if ((msg->replied_to == 0) &&
395 (msg->canceled == 0) &&
396 (msg->sending_cpu == rap->sender) &&
397 (msg->swack_vec) &&
398 (msg->msg_type != MSG_NOOP)) {
399 unsigned long mmr;
400 unsigned long mr;
401
402
403
404 msg->canceled = 1;
405
406
407
408 mmr = ops.read_l_sw_ack();
409 msg_res = msg->swack_vec;
410 mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
411 if (mmr & msg_res) {
412 stat->d_rcanceled++;
413 ops.write_l_sw_ack(mr);
414 }
415 }
416 }
417 return;
418}
419
420
421
422
423
424static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
425{
426 int pnode;
427 int apnode;
428 int maskbits;
429 int sender = bcp->cpu;
430 cpumask_t *mask = bcp->uvhub_master->cpumask;
431 struct bau_control *smaster = bcp->socket_master;
432 struct reset_args reset_args;
433
434 reset_args.sender = sender;
435 cpus_clear(*mask);
436
437 maskbits = sizeof(struct pnmask) * BITSPERBYTE;
438
439 for (pnode = 0; pnode < maskbits; pnode++) {
440 int cpu;
441 if (!bau_uvhub_isset(pnode, distribution))
442 continue;
443 apnode = pnode + bcp->partition_base_pnode;
444 cpu = pnode_to_first_cpu(apnode, smaster);
445 cpu_set(cpu, *mask);
446 }
447
448
449 smp_call_function_many(mask, do_reset, (void *)&reset_args, 1);
450 return;
451}
452
453
454
455
456
457
458static inline unsigned long long cycles_2_ns(unsigned long long cyc)
459{
460 struct cyc2ns_data *data = cyc2ns_read_begin();
461 unsigned long long ns;
462
463 ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
464
465 cyc2ns_read_end(data);
466 return ns;
467}
468
469
470
471
472static inline unsigned long long ns_2_cycles(unsigned long long ns)
473{
474 struct cyc2ns_data *data = cyc2ns_read_begin();
475 unsigned long long cyc;
476
477 cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul;
478
479 cyc2ns_read_end(data);
480 return cyc;
481}
482
483static inline unsigned long cycles_2_us(unsigned long long cyc)
484{
485 return cycles_2_ns(cyc) / NSEC_PER_USEC;
486}
487
488static inline cycles_t sec_2_cycles(unsigned long sec)
489{
490 return ns_2_cycles(sec * NSEC_PER_SEC);
491}
492
493static inline unsigned long long usec_2_cycles(unsigned long usec)
494{
495 return ns_2_cycles(usec * NSEC_PER_USEC);
496}
497
498
499
500
501
502
503static inline void quiesce_local_uvhub(struct bau_control *hmaster)
504{
505 atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce);
506}
507
508
509
510
511static inline void end_uvhub_quiesce(struct bau_control *hmaster)
512{
513 atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce);
514}
515
516static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift)
517{
518 unsigned long descriptor_status;
519
520 descriptor_status = uv_read_local_mmr(mmr_offset);
521 descriptor_status >>= right_shift;
522 descriptor_status &= UV_ACT_STATUS_MASK;
523 return descriptor_status;
524}
525
526
527
528
529
530static int uv1_wait_completion(struct bau_desc *bau_desc,
531 struct bau_control *bcp, long try)
532{
533 unsigned long descriptor_status;
534 cycles_t ttm;
535 u64 mmr_offset = bcp->status_mmr;
536 int right_shift = bcp->status_index;
537 struct ptc_stats *stat = bcp->statp;
538
539 descriptor_status = uv1_read_status(mmr_offset, right_shift);
540
541 while ((descriptor_status != DS_IDLE)) {
542
543
544
545
546
547
548 if (descriptor_status == DS_SOURCE_TIMEOUT) {
549 stat->s_stimeout++;
550 return FLUSH_GIVEUP;
551 } else if (descriptor_status == DS_DESTINATION_TIMEOUT) {
552 stat->s_dtimeout++;
553 ttm = get_cycles();
554
555
556
557
558
559
560
561 if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
562 bcp->conseccompletes = 0;
563 return FLUSH_RETRY_PLUGGED;
564 }
565
566 bcp->conseccompletes = 0;
567 return FLUSH_RETRY_TIMEOUT;
568 } else {
569
570
571
572 cpu_relax();
573 }
574 descriptor_status = uv1_read_status(mmr_offset, right_shift);
575 }
576 bcp->conseccompletes++;
577 return FLUSH_COMPLETE;
578}
579
580
581
582
583
584static unsigned long uv2_3_read_status(unsigned long offset, int rshft, int desc)
585{
586 unsigned long descriptor_status;
587
588 descriptor_status =
589 ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
590 return descriptor_status;
591}
592
593
594
595
596
597
598
599
600
601int normal_busy(struct bau_control *bcp)
602{
603 int cpu = bcp->uvhub_cpu;
604 int mmr_offset;
605 int right_shift;
606
607 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
608 right_shift = cpu * UV_ACT_STATUS_SIZE;
609 return (((((read_lmmr(mmr_offset) >> right_shift) &
610 UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY);
611}
612
613
614
615
616
617
618int handle_uv2_busy(struct bau_control *bcp)
619{
620 struct ptc_stats *stat = bcp->statp;
621
622 stat->s_uv2_wars++;
623 bcp->busy = 1;
624 return FLUSH_GIVEUP;
625}
626
627static int uv2_3_wait_completion(struct bau_desc *bau_desc,
628 struct bau_control *bcp, long try)
629{
630 unsigned long descriptor_stat;
631 cycles_t ttm;
632 u64 mmr_offset = bcp->status_mmr;
633 int right_shift = bcp->status_index;
634 int desc = bcp->uvhub_cpu;
635 long busy_reps = 0;
636 struct ptc_stats *stat = bcp->statp;
637
638 descriptor_stat = uv2_3_read_status(mmr_offset, right_shift, desc);
639
640
641 while (descriptor_stat != UV2H_DESC_IDLE) {
642 if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) {
643
644
645
646
647
648
649
650 stat->s_stimeout++;
651 return FLUSH_GIVEUP;
652 } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
653 ttm = get_cycles();
654
655
656
657
658
659
660
661
662
663
664 if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
665 bcp->conseccompletes = 0;
666 stat->s_plugged++;
667
668 return FLUSH_GIVEUP;
669 }
670 stat->s_dtimeout++;
671 bcp->conseccompletes = 0;
672
673 return FLUSH_GIVEUP;
674 } else {
675 busy_reps++;
676 if (busy_reps > 1000000) {
677
678 busy_reps = 0;
679 ttm = get_cycles();
680 if ((ttm - bcp->send_message) > bcp->timeout_interval)
681 return handle_uv2_busy(bcp);
682 }
683
684
685
686 cpu_relax();
687 }
688 descriptor_stat = uv2_3_read_status(mmr_offset, right_shift, desc);
689 }
690 bcp->conseccompletes++;
691 return FLUSH_COMPLETE;
692}
693
694
695
696
697
698static u64 read_status(u64 status_mmr, int index, int desc)
699{
700 u64 stat;
701
702 stat = ((read_lmmr(status_mmr) >> index) & UV_ACT_STATUS_MASK) << 1;
703 stat |= (read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_2) >> desc) & 0x1;
704
705 return stat;
706}
707
708static int uv4_wait_completion(struct bau_desc *bau_desc,
709 struct bau_control *bcp, long try)
710{
711 struct ptc_stats *stat = bcp->statp;
712 u64 descriptor_stat;
713 u64 mmr = bcp->status_mmr;
714 int index = bcp->status_index;
715 int desc = bcp->uvhub_cpu;
716
717 descriptor_stat = read_status(mmr, index, desc);
718
719
720 while (descriptor_stat != UV2H_DESC_IDLE) {
721 switch (descriptor_stat) {
722 case UV2H_DESC_SOURCE_TIMEOUT:
723 stat->s_stimeout++;
724 return FLUSH_GIVEUP;
725
726 case UV2H_DESC_DEST_TIMEOUT:
727 stat->s_dtimeout++;
728 bcp->conseccompletes = 0;
729 return FLUSH_RETRY_TIMEOUT;
730
731 case UV2H_DESC_DEST_STRONG_NACK:
732 stat->s_plugged++;
733 bcp->conseccompletes = 0;
734 return FLUSH_RETRY_PLUGGED;
735
736 case UV2H_DESC_DEST_PUT_ERR:
737 bcp->conseccompletes = 0;
738 return FLUSH_GIVEUP;
739
740 default:
741
742 cpu_relax();
743 }
744 descriptor_stat = read_status(mmr, index, desc);
745 }
746 bcp->conseccompletes++;
747 return FLUSH_COMPLETE;
748}
749
750
751
752
753
754
755static void destination_plugged(struct bau_desc *bau_desc,
756 struct bau_control *bcp,
757 struct bau_control *hmaster, struct ptc_stats *stat)
758{
759 udelay(bcp->plugged_delay);
760 bcp->plugged_tries++;
761
762 if (bcp->plugged_tries >= bcp->plugsb4reset) {
763 bcp->plugged_tries = 0;
764
765 quiesce_local_uvhub(hmaster);
766
767 spin_lock(&hmaster->queue_lock);
768 reset_with_ipi(&bau_desc->distribution, bcp);
769 spin_unlock(&hmaster->queue_lock);
770
771 end_uvhub_quiesce(hmaster);
772
773 bcp->ipi_attempts++;
774 stat->s_resets_plug++;
775 }
776}
777
778static void destination_timeout(struct bau_desc *bau_desc,
779 struct bau_control *bcp, struct bau_control *hmaster,
780 struct ptc_stats *stat)
781{
782 hmaster->max_concurr = 1;
783 bcp->timeout_tries++;
784 if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
785 bcp->timeout_tries = 0;
786
787 quiesce_local_uvhub(hmaster);
788
789 spin_lock(&hmaster->queue_lock);
790 reset_with_ipi(&bau_desc->distribution, bcp);
791 spin_unlock(&hmaster->queue_lock);
792
793 end_uvhub_quiesce(hmaster);
794
795 bcp->ipi_attempts++;
796 stat->s_resets_timeout++;
797 }
798}
799
800
801
802
803
804static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
805{
806 int tcpu;
807 struct bau_control *tbcp;
808 struct bau_control *hmaster;
809 cycles_t tm1;
810
811 hmaster = bcp->uvhub_master;
812 spin_lock(&hmaster->disable_lock);
813 if (!bcp->baudisabled) {
814 stat->s_bau_disabled++;
815 tm1 = get_cycles();
816 for_each_present_cpu(tcpu) {
817 tbcp = &per_cpu(bau_control, tcpu);
818 if (tbcp->uvhub_master == hmaster) {
819 tbcp->baudisabled = 1;
820 tbcp->set_bau_on_time =
821 tm1 + bcp->disabled_period;
822 }
823 }
824 }
825 spin_unlock(&hmaster->disable_lock);
826}
827
828static void count_max_concurr(int stat, struct bau_control *bcp,
829 struct bau_control *hmaster)
830{
831 bcp->plugged_tries = 0;
832 bcp->timeout_tries = 0;
833 if (stat != FLUSH_COMPLETE)
834 return;
835 if (bcp->conseccompletes <= bcp->complete_threshold)
836 return;
837 if (hmaster->max_concurr >= hmaster->max_concurr_const)
838 return;
839 hmaster->max_concurr++;
840}
841
842static void record_send_stats(cycles_t time1, cycles_t time2,
843 struct bau_control *bcp, struct ptc_stats *stat,
844 int completion_status, int try)
845{
846 cycles_t elapsed;
847
848 if (time2 > time1) {
849 elapsed = time2 - time1;
850 stat->s_time += elapsed;
851
852 if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
853 bcp->period_requests++;
854 bcp->period_time += elapsed;
855 if ((elapsed > congested_cycles) &&
856 (bcp->period_requests > bcp->cong_reps) &&
857 ((bcp->period_time / bcp->period_requests) >
858 congested_cycles)) {
859 stat->s_congested++;
860 disable_for_period(bcp, stat);
861 }
862 }
863 } else
864 stat->s_requestor--;
865
866 if (completion_status == FLUSH_COMPLETE && try > 1)
867 stat->s_retriesok++;
868 else if (completion_status == FLUSH_GIVEUP) {
869 stat->s_giveup++;
870 if (get_cycles() > bcp->period_end)
871 bcp->period_giveups = 0;
872 bcp->period_giveups++;
873 if (bcp->period_giveups == 1)
874 bcp->period_end = get_cycles() + bcp->disabled_period;
875 if (bcp->period_giveups > bcp->giveup_limit) {
876 disable_for_period(bcp, stat);
877 stat->s_giveuplimit++;
878 }
879 }
880}
881
882
883
884
885
886static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat)
887{
888 spinlock_t *lock = &hmaster->uvhub_lock;
889 atomic_t *v;
890
891 v = &hmaster->active_descriptor_count;
892 if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) {
893 stat->s_throttles++;
894 do {
895 cpu_relax();
896 } while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr));
897 }
898}
899
900
901
902
903static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
904 struct bau_control *bcp, struct bau_control *hmaster,
905 struct ptc_stats *stat)
906{
907 if (completion_status == FLUSH_RETRY_PLUGGED)
908 destination_plugged(bau_desc, bcp, hmaster, stat);
909 else if (completion_status == FLUSH_RETRY_TIMEOUT)
910 destination_timeout(bau_desc, bcp, hmaster, stat);
911}
912
913
914
915
916
917
918
919
920
921
922
923int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
924 struct bau_desc *bau_desc)
925{
926 int seq_number = 0;
927 int completion_stat = 0;
928 int uv1 = 0;
929 long try = 0;
930 unsigned long index;
931 cycles_t time1;
932 cycles_t time2;
933 struct ptc_stats *stat = bcp->statp;
934 struct bau_control *hmaster = bcp->uvhub_master;
935 struct uv1_bau_msg_header *uv1_hdr = NULL;
936 struct uv2_3_bau_msg_header *uv2_3_hdr = NULL;
937
938 if (bcp->uvhub_version == UV_BAU_V1) {
939 uv1 = 1;
940 uv1_throttle(hmaster, stat);
941 }
942
943 while (hmaster->uvhub_quiesce)
944 cpu_relax();
945
946 time1 = get_cycles();
947 if (uv1)
948 uv1_hdr = &bau_desc->header.uv1_hdr;
949 else
950
951 uv2_3_hdr = &bau_desc->header.uv2_3_hdr;
952
953 do {
954 if (try == 0) {
955 if (uv1)
956 uv1_hdr->msg_type = MSG_REGULAR;
957 else
958 uv2_3_hdr->msg_type = MSG_REGULAR;
959 seq_number = bcp->message_number++;
960 } else {
961 if (uv1)
962 uv1_hdr->msg_type = MSG_RETRY;
963 else
964 uv2_3_hdr->msg_type = MSG_RETRY;
965 stat->s_retry_messages++;
966 }
967
968 if (uv1)
969 uv1_hdr->sequence = seq_number;
970 else
971 uv2_3_hdr->sequence = seq_number;
972 index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
973 bcp->send_message = get_cycles();
974
975 write_mmr_activation(index);
976
977 try++;
978 completion_stat = ops.wait_completion(bau_desc, bcp, try);
979
980 handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
981
982 if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
983 bcp->ipi_attempts = 0;
984 stat->s_overipilimit++;
985 completion_stat = FLUSH_GIVEUP;
986 break;
987 }
988 cpu_relax();
989 } while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
990 (completion_stat == FLUSH_RETRY_TIMEOUT));
991
992 time2 = get_cycles();
993
994 count_max_concurr(completion_stat, bcp, hmaster);
995
996 while (hmaster->uvhub_quiesce)
997 cpu_relax();
998
999 atomic_dec(&hmaster->active_descriptor_count);
1000
1001 record_send_stats(time1, time2, bcp, stat, completion_stat, try);
1002
1003 if (completion_stat == FLUSH_GIVEUP)
1004
1005 return 1;
1006 return 0;
1007}
1008
1009
1010
1011
1012
1013
1014static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
1015{
1016 int tcpu;
1017 struct bau_control *tbcp;
1018 struct bau_control *hmaster;
1019
1020 hmaster = bcp->uvhub_master;
1021 spin_lock(&hmaster->disable_lock);
1022 if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
1023 stat->s_bau_reenabled++;
1024 for_each_present_cpu(tcpu) {
1025 tbcp = &per_cpu(bau_control, tcpu);
1026 if (tbcp->uvhub_master == hmaster) {
1027 tbcp->baudisabled = 0;
1028 tbcp->period_requests = 0;
1029 tbcp->period_time = 0;
1030 tbcp->period_giveups = 0;
1031 }
1032 }
1033 spin_unlock(&hmaster->disable_lock);
1034 return 0;
1035 }
1036 spin_unlock(&hmaster->disable_lock);
1037 return -1;
1038}
1039
1040static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs,
1041 int remotes, struct bau_desc *bau_desc)
1042{
1043 stat->s_requestor++;
1044 stat->s_ntargcpu += remotes + locals;
1045 stat->s_ntargremotes += remotes;
1046 stat->s_ntarglocals += locals;
1047
1048
1049 hubs = bau_uvhub_weight(&bau_desc->distribution);
1050 if (locals) {
1051 stat->s_ntarglocaluvhub++;
1052 stat->s_ntargremoteuvhub += (hubs - 1);
1053 } else
1054 stat->s_ntargremoteuvhub += hubs;
1055
1056 stat->s_ntarguvhub += hubs;
1057
1058 if (hubs >= 16)
1059 stat->s_ntarguvhub16++;
1060 else if (hubs >= 8)
1061 stat->s_ntarguvhub8++;
1062 else if (hubs >= 4)
1063 stat->s_ntarguvhub4++;
1064 else if (hubs >= 2)
1065 stat->s_ntarguvhub2++;
1066 else
1067 stat->s_ntarguvhub1++;
1068}
1069
1070
1071
1072
1073
1074static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
1075 struct bau_desc *bau_desc, int *localsp, int *remotesp)
1076{
1077 int cpu;
1078 int pnode;
1079 int cnt = 0;
1080 struct hub_and_pnode *hpp;
1081
1082 for_each_cpu(cpu, flush_mask) {
1083
1084
1085
1086
1087
1088
1089 hpp = &bcp->socket_master->thp[cpu];
1090 pnode = hpp->pnode - bcp->partition_base_pnode;
1091 bau_uvhub_set(pnode, &bau_desc->distribution);
1092 cnt++;
1093 if (hpp->uvhub == bcp->uvhub)
1094 (*localsp)++;
1095 else
1096 (*remotesp)++;
1097 }
1098 if (!cnt)
1099 return 1;
1100 return 0;
1101}
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1129 struct mm_struct *mm,
1130 unsigned long start,
1131 unsigned long end,
1132 unsigned int cpu)
1133{
1134 int locals = 0, remotes = 0, hubs = 0;
1135 struct bau_desc *bau_desc;
1136 struct cpumask *flush_mask;
1137 struct ptc_stats *stat;
1138 struct bau_control *bcp;
1139 unsigned long descriptor_status, status, address;
1140
1141 bcp = &per_cpu(bau_control, cpu);
1142
1143 if (bcp->nobau)
1144 return cpumask;
1145
1146 stat = bcp->statp;
1147 stat->s_enters++;
1148
1149 if (bcp->busy) {
1150 descriptor_status =
1151 read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0);
1152 status = ((descriptor_status >> (bcp->uvhub_cpu *
1153 UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1;
1154 if (status == UV2H_DESC_BUSY)
1155 return cpumask;
1156 bcp->busy = 0;
1157 }
1158
1159
1160 if (bcp->baudisabled) {
1161 if (check_enable(bcp, stat)) {
1162 stat->s_ipifordisabled++;
1163 return cpumask;
1164 }
1165 }
1166
1167
1168
1169
1170
1171
1172 flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
1173
1174 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
1175
1176 if (cpu_isset(cpu, *cpumask))
1177 stat->s_ntargself++;
1178
1179 bau_desc = bcp->descriptor_base;
1180 bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu);
1181 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
1182 if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
1183 return NULL;
1184
1185 record_send_statistics(stat, locals, hubs, remotes, bau_desc);
1186
1187 if (!end || (end - start) <= PAGE_SIZE)
1188 address = start;
1189 else
1190 address = TLB_FLUSH_ALL;
1191
1192 switch (bcp->uvhub_version) {
1193 case UV_BAU_V1:
1194 case UV_BAU_V2:
1195 case UV_BAU_V3:
1196 bau_desc->payload.uv1_2_3.address = address;
1197 bau_desc->payload.uv1_2_3.sending_cpu = cpu;
1198 break;
1199 case UV_BAU_V4:
1200 bau_desc->payload.uv4.address = address;
1201 bau_desc->payload.uv4.sending_cpu = cpu;
1202 bau_desc->payload.uv4.qualifier = BAU_DESC_QUALIFIER;
1203 break;
1204 }
1205
1206
1207
1208
1209
1210 if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc))
1211 return NULL;
1212 else
1213 return cpumask;
1214}
1215
1216
1217
1218
1219
1220struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
1221 struct bau_control *bcp)
1222{
1223 struct bau_pq_entry *msg_next = msg + 1;
1224 unsigned char swack_vec = msg->swack_vec;
1225
1226 if (msg_next > bcp->queue_last)
1227 msg_next = bcp->queue_first;
1228 while (msg_next != msg) {
1229 if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) &&
1230 (msg_next->swack_vec == swack_vec))
1231 return msg_next;
1232 msg_next++;
1233 if (msg_next > bcp->queue_last)
1234 msg_next = bcp->queue_first;
1235 }
1236 return NULL;
1237}
1238
1239
1240
1241
1242
1243
1244void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
1245{
1246 unsigned long mmr_image;
1247 unsigned char swack_vec;
1248 struct bau_pq_entry *msg = mdp->msg;
1249 struct bau_pq_entry *other_msg;
1250
1251 mmr_image = ops.read_l_sw_ack();
1252 swack_vec = msg->swack_vec;
1253
1254 if ((swack_vec & mmr_image) == 0) {
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264 other_msg = find_another_by_swack(msg, bcp);
1265 if (other_msg) {
1266
1267
1268
1269
1270 bau_process_message(mdp, bcp, 0);
1271
1272
1273
1274
1275
1276 return;
1277 }
1278 }
1279
1280
1281
1282
1283
1284 bau_process_message(mdp, bcp, 1);
1285
1286 return;
1287}
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303void uv_bau_message_interrupt(struct pt_regs *regs)
1304{
1305 int count = 0;
1306 cycles_t time_start;
1307 struct bau_pq_entry *msg;
1308 struct bau_control *bcp;
1309 struct ptc_stats *stat;
1310 struct msg_desc msgdesc;
1311
1312 ack_APIC_irq();
1313 time_start = get_cycles();
1314
1315 bcp = &per_cpu(bau_control, smp_processor_id());
1316 stat = bcp->statp;
1317
1318 msgdesc.queue_first = bcp->queue_first;
1319 msgdesc.queue_last = bcp->queue_last;
1320
1321 msg = bcp->bau_msg_head;
1322 while (msg->swack_vec) {
1323 count++;
1324
1325 msgdesc.msg_slot = msg - msgdesc.queue_first;
1326 msgdesc.msg = msg;
1327 if (bcp->uvhub_version == UV_BAU_V2)
1328 process_uv2_message(&msgdesc, bcp);
1329 else
1330
1331 bau_process_message(&msgdesc, bcp, 1);
1332
1333 msg++;
1334 if (msg > msgdesc.queue_last)
1335 msg = msgdesc.queue_first;
1336 bcp->bau_msg_head = msg;
1337 }
1338 stat->d_time += (get_cycles() - time_start);
1339 if (!count)
1340 stat->d_nomsg++;
1341 else if (count > 1)
1342 stat->d_multmsg++;
1343}
1344
1345
1346
1347
1348
1349
1350
1351static void __init enable_timeouts(void)
1352{
1353 int uvhub;
1354 int nuvhubs;
1355 int pnode;
1356 unsigned long mmr_image;
1357
1358 nuvhubs = uv_num_possible_blades();
1359
1360 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
1361 if (!uv_blade_nr_possible_cpus(uvhub))
1362 continue;
1363
1364 pnode = uv_blade_to_pnode(uvhub);
1365 mmr_image = read_mmr_misc_control(pnode);
1366
1367
1368
1369
1370
1371
1372 mmr_image &= ~(1L << SOFTACK_MSHIFT);
1373 write_mmr_misc_control(pnode, mmr_image);
1374
1375
1376
1377 mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT);
1378 mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT);
1379 write_mmr_misc_control(pnode, mmr_image);
1380
1381
1382
1383
1384
1385
1386 mmr_image |= (1L << SOFTACK_MSHIFT);
1387 if (is_uv2_hub()) {
1388
1389
1390 mmr_image &= ~(1L << UV2_EXT_SHFT);
1391 } else if (is_uv3_hub()) {
1392 mmr_image &= ~(1L << PREFETCH_HINT_SHFT);
1393 mmr_image |= (1L << SB_STATUS_SHFT);
1394 }
1395 write_mmr_misc_control(pnode, mmr_image);
1396 }
1397}
1398
1399static void *ptc_seq_start(struct seq_file *file, loff_t *offset)
1400{
1401 if (*offset < num_possible_cpus())
1402 return offset;
1403 return NULL;
1404}
1405
1406static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
1407{
1408 (*offset)++;
1409 if (*offset < num_possible_cpus())
1410 return offset;
1411 return NULL;
1412}
1413
1414static void ptc_seq_stop(struct seq_file *file, void *data)
1415{
1416}
1417
1418
1419
1420
1421
1422
1423static int ptc_seq_show(struct seq_file *file, void *data)
1424{
1425 struct ptc_stats *stat;
1426 struct bau_control *bcp;
1427 int cpu;
1428
1429 cpu = *(loff_t *)data;
1430 if (!cpu) {
1431 seq_printf(file,
1432 "# cpu bauoff sent stime self locals remotes ncpus localhub ");
1433 seq_printf(file,
1434 "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
1435 seq_printf(file,
1436 "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries ");
1437 seq_printf(file,
1438 "rok resetp resett giveup sto bz throt disable ");
1439 seq_printf(file,
1440 "enable wars warshw warwaits enters ipidis plugged ");
1441 seq_printf(file,
1442 "ipiover glim cong swack recv rtime all one mult ");
1443 seq_printf(file,
1444 "none retry canc nocan reset rcan\n");
1445 }
1446 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
1447 bcp = &per_cpu(bau_control, cpu);
1448 if (bcp->nobau) {
1449 seq_printf(file, "cpu %d bau disabled\n", cpu);
1450 return 0;
1451 }
1452 stat = bcp->statp;
1453
1454 seq_printf(file,
1455 "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
1456 cpu, bcp->nobau, stat->s_requestor,
1457 cycles_2_us(stat->s_time),
1458 stat->s_ntargself, stat->s_ntarglocals,
1459 stat->s_ntargremotes, stat->s_ntargcpu,
1460 stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
1461 stat->s_ntarguvhub, stat->s_ntarguvhub16);
1462 seq_printf(file, "%ld %ld %ld %ld %ld %ld ",
1463 stat->s_ntarguvhub8, stat->s_ntarguvhub4,
1464 stat->s_ntarguvhub2, stat->s_ntarguvhub1,
1465 stat->s_dtimeout, stat->s_strongnacks);
1466 seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
1467 stat->s_retry_messages, stat->s_retriesok,
1468 stat->s_resets_plug, stat->s_resets_timeout,
1469 stat->s_giveup, stat->s_stimeout,
1470 stat->s_busy, stat->s_throttles);
1471 seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
1472 stat->s_bau_disabled, stat->s_bau_reenabled,
1473 stat->s_uv2_wars, stat->s_uv2_wars_hw,
1474 stat->s_uv2_war_waits, stat->s_enters,
1475 stat->s_ipifordisabled, stat->s_plugged,
1476 stat->s_overipilimit, stat->s_giveuplimit,
1477 stat->s_congested);
1478
1479
1480 seq_printf(file,
1481 "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
1482 ops.read_g_sw_ack(uv_cpu_to_pnode(cpu)),
1483 stat->d_requestee, cycles_2_us(stat->d_time),
1484 stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
1485 stat->d_nomsg, stat->d_retries, stat->d_canceled,
1486 stat->d_nocanceled, stat->d_resets,
1487 stat->d_rcanceled);
1488 }
1489 return 0;
1490}
1491
1492
1493
1494
1495static ssize_t tunables_read(struct file *file, char __user *userbuf,
1496 size_t count, loff_t *ppos)
1497{
1498 char *buf;
1499 int ret;
1500
1501 buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n",
1502 "max_concur plugged_delay plugsb4reset timeoutsb4reset",
1503 "ipi_reset_limit complete_threshold congested_response_us",
1504 "congested_reps disabled_period giveup_limit",
1505 max_concurr, plugged_delay, plugsb4reset,
1506 timeoutsb4reset, ipi_reset_limit, complete_threshold,
1507 congested_respns_us, congested_reps, disabled_period,
1508 giveup_limit);
1509
1510 if (!buf)
1511 return -ENOMEM;
1512
1513 ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
1514 kfree(buf);
1515 return ret;
1516}
1517
1518
1519
1520
1521
1522
1523static ssize_t ptc_proc_write(struct file *file, const char __user *user,
1524 size_t count, loff_t *data)
1525{
1526 int cpu;
1527 int i;
1528 int elements;
1529 long input_arg;
1530 char optstr[64];
1531 struct ptc_stats *stat;
1532
1533 if (count == 0 || count > sizeof(optstr))
1534 return -EINVAL;
1535 if (copy_from_user(optstr, user, count))
1536 return -EFAULT;
1537 optstr[count - 1] = '\0';
1538
1539 if (!strcmp(optstr, "on")) {
1540 set_bau_on();
1541 return count;
1542 } else if (!strcmp(optstr, "off")) {
1543 set_bau_off();
1544 return count;
1545 }
1546
1547 if (kstrtol(optstr, 10, &input_arg) < 0) {
1548 pr_debug("%s is invalid\n", optstr);
1549 return -EINVAL;
1550 }
1551
1552 if (input_arg == 0) {
1553 elements = ARRAY_SIZE(stat_description);
1554 pr_debug("# cpu: cpu number\n");
1555 pr_debug("Sender statistics:\n");
1556 for (i = 0; i < elements; i++)
1557 pr_debug("%s\n", stat_description[i]);
1558 } else if (input_arg == -1) {
1559 for_each_present_cpu(cpu) {
1560 stat = &per_cpu(ptcstats, cpu);
1561 memset(stat, 0, sizeof(struct ptc_stats));
1562 }
1563 }
1564
1565 return count;
1566}
1567
1568static int local_atoi(const char *name)
1569{
1570 int val = 0;
1571
1572 for (;; name++) {
1573 switch (*name) {
1574 case '0' ... '9':
1575 val = 10*val+(*name-'0');
1576 break;
1577 default:
1578 return val;
1579 }
1580 }
1581}
1582
1583
1584
1585
1586
1587static int parse_tunables_write(struct bau_control *bcp, char *instr,
1588 int count)
1589{
1590 char *p;
1591 char *q;
1592 int cnt = 0;
1593 int val;
1594 int e = ARRAY_SIZE(tunables);
1595
1596 p = instr + strspn(instr, WHITESPACE);
1597 q = p;
1598 for (; *p; p = q + strspn(q, WHITESPACE)) {
1599 q = p + strcspn(p, WHITESPACE);
1600 cnt++;
1601 if (q == p)
1602 break;
1603 }
1604 if (cnt != e) {
1605 pr_info("bau tunable error: should be %d values\n", e);
1606 return -EINVAL;
1607 }
1608
1609 p = instr + strspn(instr, WHITESPACE);
1610 q = p;
1611 for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) {
1612 q = p + strcspn(p, WHITESPACE);
1613 val = local_atoi(p);
1614 switch (cnt) {
1615 case 0:
1616 if (val == 0) {
1617 max_concurr = MAX_BAU_CONCURRENT;
1618 max_concurr_const = MAX_BAU_CONCURRENT;
1619 continue;
1620 }
1621 if (val < 1 || val > bcp->cpus_in_uvhub) {
1622 pr_debug(
1623 "Error: BAU max concurrent %d is invalid\n",
1624 val);
1625 return -EINVAL;
1626 }
1627 max_concurr = val;
1628 max_concurr_const = val;
1629 continue;
1630 default:
1631 if (val == 0)
1632 *tunables[cnt].tunp = tunables[cnt].deflt;
1633 else
1634 *tunables[cnt].tunp = val;
1635 continue;
1636 }
1637 if (q == p)
1638 break;
1639 }
1640 return 0;
1641}
1642
1643
1644
1645
1646static ssize_t tunables_write(struct file *file, const char __user *user,
1647 size_t count, loff_t *data)
1648{
1649 int cpu;
1650 int ret;
1651 char instr[100];
1652 struct bau_control *bcp;
1653
1654 if (count == 0 || count > sizeof(instr)-1)
1655 return -EINVAL;
1656 if (copy_from_user(instr, user, count))
1657 return -EFAULT;
1658
1659 instr[count] = '\0';
1660
1661 cpu = get_cpu();
1662 bcp = &per_cpu(bau_control, cpu);
1663 ret = parse_tunables_write(bcp, instr, count);
1664 put_cpu();
1665 if (ret)
1666 return ret;
1667
1668 for_each_present_cpu(cpu) {
1669 bcp = &per_cpu(bau_control, cpu);
1670 bcp->max_concurr = max_concurr;
1671 bcp->max_concurr_const = max_concurr;
1672 bcp->plugged_delay = plugged_delay;
1673 bcp->plugsb4reset = plugsb4reset;
1674 bcp->timeoutsb4reset = timeoutsb4reset;
1675 bcp->ipi_reset_limit = ipi_reset_limit;
1676 bcp->complete_threshold = complete_threshold;
1677 bcp->cong_response_us = congested_respns_us;
1678 bcp->cong_reps = congested_reps;
1679 bcp->disabled_period = sec_2_cycles(disabled_period);
1680 bcp->giveup_limit = giveup_limit;
1681 }
1682 return count;
1683}
1684
1685static const struct seq_operations uv_ptc_seq_ops = {
1686 .start = ptc_seq_start,
1687 .next = ptc_seq_next,
1688 .stop = ptc_seq_stop,
1689 .show = ptc_seq_show
1690};
1691
1692static int ptc_proc_open(struct inode *inode, struct file *file)
1693{
1694 return seq_open(file, &uv_ptc_seq_ops);
1695}
1696
1697static int tunables_open(struct inode *inode, struct file *file)
1698{
1699 return 0;
1700}
1701
1702static const struct file_operations proc_uv_ptc_operations = {
1703 .open = ptc_proc_open,
1704 .read = seq_read,
1705 .write = ptc_proc_write,
1706 .llseek = seq_lseek,
1707 .release = seq_release,
1708};
1709
1710static const struct file_operations tunables_fops = {
1711 .open = tunables_open,
1712 .read = tunables_read,
1713 .write = tunables_write,
1714 .llseek = default_llseek,
1715};
1716
1717static int __init uv_ptc_init(void)
1718{
1719 struct proc_dir_entry *proc_uv_ptc;
1720
1721 if (!is_uv_system())
1722 return 0;
1723
1724 proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
1725 &proc_uv_ptc_operations);
1726 if (!proc_uv_ptc) {
1727 pr_err("unable to create %s proc entry\n",
1728 UV_PTC_BASENAME);
1729 return -EINVAL;
1730 }
1731
1732 tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL);
1733 if (!tunables_dir) {
1734 pr_err("unable to create debugfs directory %s\n",
1735 UV_BAU_TUNABLES_DIR);
1736 return -EINVAL;
1737 }
1738 tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
1739 tunables_dir, NULL, &tunables_fops);
1740 if (!tunables_file) {
1741 pr_err("unable to create debugfs file %s\n",
1742 UV_BAU_TUNABLES_FILE);
1743 return -EINVAL;
1744 }
1745 return 0;
1746}
1747
1748
1749
1750
1751static void activation_descriptor_init(int node, int pnode, int base_pnode)
1752{
1753 int i;
1754 int cpu;
1755 int uv1 = 0;
1756 unsigned long gpa;
1757 unsigned long m;
1758 unsigned long n;
1759 size_t dsize;
1760 struct bau_desc *bau_desc;
1761 struct bau_desc *bd2;
1762 struct uv1_bau_msg_header *uv1_hdr;
1763 struct uv2_3_bau_msg_header *uv2_3_hdr;
1764 struct bau_control *bcp;
1765
1766
1767
1768
1769
1770 dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC;
1771 bau_desc = kmalloc_node(dsize, GFP_KERNEL, node);
1772 BUG_ON(!bau_desc);
1773
1774 gpa = uv_gpa(bau_desc);
1775 n = uv_gpa_to_gnode(gpa);
1776 m = ops.bau_gpa_to_offset(gpa);
1777 if (is_uv1_hub())
1778 uv1 = 1;
1779
1780
1781 write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
1782
1783
1784
1785
1786
1787 for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
1788 memset(bd2, 0, sizeof(struct bau_desc));
1789 if (uv1) {
1790 uv1_hdr = &bd2->header.uv1_hdr;
1791 uv1_hdr->swack_flag = 1;
1792
1793
1794
1795
1796
1797
1798
1799 uv1_hdr->base_dest_nasid =
1800 UV_PNODE_TO_NASID(base_pnode);
1801 uv1_hdr->dest_subnodeid = UV_LB_SUBNODEID;
1802 uv1_hdr->command = UV_NET_ENDPOINT_INTD;
1803 uv1_hdr->int_both = 1;
1804
1805
1806
1807
1808 } else {
1809
1810
1811
1812
1813 uv2_3_hdr = &bd2->header.uv2_3_hdr;
1814 uv2_3_hdr->swack_flag = 1;
1815 uv2_3_hdr->base_dest_nasid =
1816 UV_PNODE_TO_NASID(base_pnode);
1817 uv2_3_hdr->dest_subnodeid = UV_LB_SUBNODEID;
1818 uv2_3_hdr->command = UV_NET_ENDPOINT_INTD;
1819 }
1820 }
1821 for_each_present_cpu(cpu) {
1822 if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
1823 continue;
1824 bcp = &per_cpu(bau_control, cpu);
1825 bcp->descriptor_base = bau_desc;
1826 }
1827}
1828
1829
1830
1831
1832
1833
1834
1835static void pq_init(int node, int pnode)
1836{
1837 int cpu;
1838 size_t plsize;
1839 char *cp;
1840 void *vp;
1841 unsigned long gnode, first, last, tail;
1842 struct bau_pq_entry *pqp;
1843 struct bau_control *bcp;
1844
1845 plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry);
1846 vp = kmalloc_node(plsize, GFP_KERNEL, node);
1847 pqp = (struct bau_pq_entry *)vp;
1848 BUG_ON(!pqp);
1849
1850 cp = (char *)pqp + 31;
1851 pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5);
1852
1853 for_each_present_cpu(cpu) {
1854 if (pnode != uv_cpu_to_pnode(cpu))
1855 continue;
1856
1857 bcp = &per_cpu(bau_control, cpu);
1858 bcp->queue_first = pqp;
1859 bcp->bau_msg_head = pqp;
1860 bcp->queue_last = pqp + (DEST_Q_SIZE - 1);
1861 }
1862
1863 first = ops.bau_gpa_to_offset(uv_gpa(pqp));
1864 last = ops.bau_gpa_to_offset(uv_gpa(pqp + (DEST_Q_SIZE - 1)));
1865
1866
1867
1868
1869
1870 bcp = &per_cpu(bau_control, smp_processor_id());
1871 if (bcp->uvhub_version <= UV_BAU_V3) {
1872 tail = first;
1873 gnode = uv_gpa_to_gnode(uv_gpa(pqp));
1874 first = (gnode << UV_PAYLOADQ_GNODE_SHIFT) | tail;
1875 write_mmr_payload_tail(pnode, tail);
1876 }
1877
1878 ops.write_payload_first(pnode, first);
1879 ops.write_payload_last(pnode, last);
1880
1881
1882 memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
1883}
1884
1885
1886
1887
1888static void __init init_uvhub(int uvhub, int vector, int base_pnode)
1889{
1890 int node;
1891 int pnode;
1892 unsigned long apicid;
1893
1894 node = uvhub_to_first_node(uvhub);
1895 pnode = uv_blade_to_pnode(uvhub);
1896
1897 activation_descriptor_init(node, pnode, base_pnode);
1898
1899 pq_init(node, pnode);
1900
1901
1902
1903
1904 apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits;
1905 write_mmr_data_config(pnode, ((apicid << 32) | vector));
1906}
1907
1908
1909
1910
1911
1912
1913static int calculate_destination_timeout(void)
1914{
1915 unsigned long mmr_image;
1916 int mult1;
1917 int mult2;
1918 int index;
1919 int base;
1920 int ret;
1921 unsigned long ts_ns;
1922
1923 if (is_uv1_hub()) {
1924 mult1 = SOFTACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK;
1925 mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL);
1926 index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
1927 mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
1928 mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
1929 ts_ns = timeout_base_ns[index];
1930 ts_ns *= (mult1 * mult2);
1931 ret = ts_ns / 1000;
1932 } else {
1933
1934
1935 mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL);
1936 mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
1937 if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
1938 base = 80;
1939 else
1940 base = 10;
1941 mult1 = mmr_image & UV2_ACK_MASK;
1942 ret = mult1 * base;
1943 }
1944 return ret;
1945}
1946
1947static void __init init_per_cpu_tunables(void)
1948{
1949 int cpu;
1950 struct bau_control *bcp;
1951
1952 for_each_present_cpu(cpu) {
1953 bcp = &per_cpu(bau_control, cpu);
1954 bcp->baudisabled = 0;
1955 if (nobau)
1956 bcp->nobau = true;
1957 bcp->statp = &per_cpu(ptcstats, cpu);
1958
1959 bcp->timeout_interval = usec_2_cycles(2*timeout_us);
1960 bcp->max_concurr = max_concurr;
1961 bcp->max_concurr_const = max_concurr;
1962 bcp->plugged_delay = plugged_delay;
1963 bcp->plugsb4reset = plugsb4reset;
1964 bcp->timeoutsb4reset = timeoutsb4reset;
1965 bcp->ipi_reset_limit = ipi_reset_limit;
1966 bcp->complete_threshold = complete_threshold;
1967 bcp->cong_response_us = congested_respns_us;
1968 bcp->cong_reps = congested_reps;
1969 bcp->disabled_period = sec_2_cycles(disabled_period);
1970 bcp->giveup_limit = giveup_limit;
1971 spin_lock_init(&bcp->queue_lock);
1972 spin_lock_init(&bcp->uvhub_lock);
1973 spin_lock_init(&bcp->disable_lock);
1974 }
1975}
1976
1977
1978
1979
1980static int __init get_cpu_topology(int base_pnode,
1981 struct uvhub_desc *uvhub_descs,
1982 unsigned char *uvhub_mask)
1983{
1984 int cpu;
1985 int pnode;
1986 int uvhub;
1987 int socket;
1988 struct bau_control *bcp;
1989 struct uvhub_desc *bdp;
1990 struct socket_desc *sdp;
1991
1992 for_each_present_cpu(cpu) {
1993 bcp = &per_cpu(bau_control, cpu);
1994
1995 memset(bcp, 0, sizeof(struct bau_control));
1996
1997 pnode = uv_cpu_hub_info(cpu)->pnode;
1998 if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) {
1999 pr_emerg(
2000 "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
2001 cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE);
2002 return 1;
2003 }
2004
2005 bcp->osnode = cpu_to_node(cpu);
2006 bcp->partition_base_pnode = base_pnode;
2007
2008 uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
2009 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
2010 bdp = &uvhub_descs[uvhub];
2011
2012 bdp->num_cpus++;
2013 bdp->uvhub = uvhub;
2014 bdp->pnode = pnode;
2015
2016
2017
2018 socket = bcp->osnode & 1;
2019 bdp->socket_mask |= (1 << socket);
2020 sdp = &bdp->socket[socket];
2021 sdp->cpu_number[sdp->num_cpus] = cpu;
2022 sdp->num_cpus++;
2023 if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
2024 pr_emerg("%d cpus per socket invalid\n",
2025 sdp->num_cpus);
2026 return 1;
2027 }
2028 }
2029 return 0;
2030}
2031
2032
2033
2034
2035static void make_per_cpu_thp(struct bau_control *smaster)
2036{
2037 int cpu;
2038 size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus();
2039
2040 smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode);
2041 memset(smaster->thp, 0, hpsz);
2042 for_each_present_cpu(cpu) {
2043 smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode;
2044 smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
2045 }
2046}
2047
2048
2049
2050
2051static void make_per_hub_cpumask(struct bau_control *hmaster)
2052{
2053 int sz = sizeof(cpumask_t);
2054
2055 hmaster->cpumask = kzalloc_node(sz, GFP_KERNEL, hmaster->osnode);
2056}
2057
2058
2059
2060
2061
2062
2063static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
2064 struct bau_control **smasterp,
2065 struct bau_control **hmasterp)
2066{
2067 int i, cpu, uvhub_cpu;
2068 struct bau_control *bcp;
2069
2070 for (i = 0; i < sdp->num_cpus; i++) {
2071 cpu = sdp->cpu_number[i];
2072 bcp = &per_cpu(bau_control, cpu);
2073 bcp->cpu = cpu;
2074 if (i == 0) {
2075 *smasterp = bcp;
2076 if (!(*hmasterp))
2077 *hmasterp = bcp;
2078 }
2079 bcp->cpus_in_uvhub = bdp->num_cpus;
2080 bcp->cpus_in_socket = sdp->num_cpus;
2081 bcp->socket_master = *smasterp;
2082 bcp->uvhub = bdp->uvhub;
2083 if (is_uv1_hub())
2084 bcp->uvhub_version = UV_BAU_V1;
2085 else if (is_uv2_hub())
2086 bcp->uvhub_version = UV_BAU_V2;
2087 else if (is_uv3_hub())
2088 bcp->uvhub_version = UV_BAU_V3;
2089 else if (is_uv4_hub())
2090 bcp->uvhub_version = UV_BAU_V4;
2091 else {
2092 pr_emerg("uvhub version not 1, 2, 3, or 4\n");
2093 return 1;
2094 }
2095 bcp->uvhub_master = *hmasterp;
2096 uvhub_cpu = uv_cpu_blade_processor_id(cpu);
2097 bcp->uvhub_cpu = uvhub_cpu;
2098
2099
2100
2101
2102
2103 if (uvhub_cpu < UV_CPUS_PER_AS) {
2104 bcp->status_mmr = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
2105 bcp->status_index = uvhub_cpu * UV_ACT_STATUS_SIZE;
2106 } else {
2107 bcp->status_mmr = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
2108 bcp->status_index = (uvhub_cpu - UV_CPUS_PER_AS)
2109 * UV_ACT_STATUS_SIZE;
2110 }
2111
2112 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
2113 pr_emerg("%d cpus per uvhub invalid\n",
2114 bcp->uvhub_cpu);
2115 return 1;
2116 }
2117 }
2118 return 0;
2119}
2120
2121
2122
2123
2124static int __init summarize_uvhub_sockets(int nuvhubs,
2125 struct uvhub_desc *uvhub_descs,
2126 unsigned char *uvhub_mask)
2127{
2128 int socket;
2129 int uvhub;
2130 unsigned short socket_mask;
2131
2132 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
2133 struct uvhub_desc *bdp;
2134 struct bau_control *smaster = NULL;
2135 struct bau_control *hmaster = NULL;
2136
2137 if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
2138 continue;
2139
2140 bdp = &uvhub_descs[uvhub];
2141 socket_mask = bdp->socket_mask;
2142 socket = 0;
2143 while (socket_mask) {
2144 struct socket_desc *sdp;
2145 if ((socket_mask & 1)) {
2146 sdp = &bdp->socket[socket];
2147 if (scan_sock(sdp, bdp, &smaster, &hmaster))
2148 return 1;
2149 make_per_cpu_thp(smaster);
2150 }
2151 socket++;
2152 socket_mask = (socket_mask >> 1);
2153 }
2154 make_per_hub_cpumask(hmaster);
2155 }
2156 return 0;
2157}
2158
2159
2160
2161
2162static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
2163{
2164 unsigned char *uvhub_mask;
2165 void *vp;
2166 struct uvhub_desc *uvhub_descs;
2167
2168 if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub())
2169 timeout_us = calculate_destination_timeout();
2170
2171 vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
2172 uvhub_descs = (struct uvhub_desc *)vp;
2173 memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc));
2174 uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
2175
2176 if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
2177 goto fail;
2178
2179 if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask))
2180 goto fail;
2181
2182 kfree(uvhub_descs);
2183 kfree(uvhub_mask);
2184 init_per_cpu_tunables();
2185 return 0;
2186
2187fail:
2188 kfree(uvhub_descs);
2189 kfree(uvhub_mask);
2190 return 1;
2191}
2192
2193static const struct bau_operations uv1_bau_ops __initconst = {
2194 .bau_gpa_to_offset = uv_gpa_to_offset,
2195 .read_l_sw_ack = read_mmr_sw_ack,
2196 .read_g_sw_ack = read_gmmr_sw_ack,
2197 .write_l_sw_ack = write_mmr_sw_ack,
2198 .write_g_sw_ack = write_gmmr_sw_ack,
2199 .write_payload_first = write_mmr_payload_first,
2200 .write_payload_last = write_mmr_payload_last,
2201 .wait_completion = uv1_wait_completion,
2202};
2203
2204static const struct bau_operations uv2_3_bau_ops __initconst = {
2205 .bau_gpa_to_offset = uv_gpa_to_offset,
2206 .read_l_sw_ack = read_mmr_sw_ack,
2207 .read_g_sw_ack = read_gmmr_sw_ack,
2208 .write_l_sw_ack = write_mmr_sw_ack,
2209 .write_g_sw_ack = write_gmmr_sw_ack,
2210 .write_payload_first = write_mmr_payload_first,
2211 .write_payload_last = write_mmr_payload_last,
2212 .wait_completion = uv2_3_wait_completion,
2213};
2214
2215static const struct bau_operations uv4_bau_ops __initconst = {
2216 .bau_gpa_to_offset = uv_gpa_to_soc_phys_ram,
2217 .read_l_sw_ack = read_mmr_proc_sw_ack,
2218 .read_g_sw_ack = read_gmmr_proc_sw_ack,
2219 .write_l_sw_ack = write_mmr_proc_sw_ack,
2220 .write_g_sw_ack = write_gmmr_proc_sw_ack,
2221 .write_payload_first = write_mmr_proc_payload_first,
2222 .write_payload_last = write_mmr_proc_payload_last,
2223 .wait_completion = uv4_wait_completion,
2224};
2225
2226
2227
2228
2229static int __init uv_bau_init(void)
2230{
2231 int uvhub;
2232 int pnode;
2233 int nuvhubs;
2234 int cur_cpu;
2235 int cpus;
2236 int vector;
2237 cpumask_var_t *mask;
2238
2239 if (!is_uv_system())
2240 return 0;
2241
2242 if (is_uv4_hub())
2243 ops = uv4_bau_ops;
2244 else if (is_uv3_hub())
2245 ops = uv2_3_bau_ops;
2246 else if (is_uv2_hub())
2247 ops = uv2_3_bau_ops;
2248 else if (is_uv1_hub())
2249 ops = uv1_bau_ops;
2250
2251 for_each_possible_cpu(cur_cpu) {
2252 mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
2253 zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
2254 }
2255
2256 nuvhubs = uv_num_possible_blades();
2257 congested_cycles = usec_2_cycles(congested_respns_us);
2258
2259 uv_base_pnode = 0x7fffffff;
2260 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
2261 cpus = uv_blade_nr_possible_cpus(uvhub);
2262 if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode))
2263 uv_base_pnode = uv_blade_to_pnode(uvhub);
2264 }
2265
2266
2267 if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub())
2268 enable_timeouts();
2269
2270 if (init_per_cpu(nuvhubs, uv_base_pnode)) {
2271 set_bau_off();
2272 nobau_perm = 1;
2273 return 0;
2274 }
2275
2276 vector = UV_BAU_MESSAGE;
2277 for_each_possible_blade(uvhub) {
2278 if (uv_blade_nr_possible_cpus(uvhub))
2279 init_uvhub(uvhub, vector, uv_base_pnode);
2280 }
2281
2282 alloc_intr_gate(vector, uv_bau_message_intr1);
2283
2284 for_each_possible_blade(uvhub) {
2285 if (uv_blade_nr_possible_cpus(uvhub)) {
2286 unsigned long val;
2287 unsigned long mmr;
2288 pnode = uv_blade_to_pnode(uvhub);
2289
2290 val = 1L << 63;
2291 write_gmmr_activation(pnode, val);
2292 mmr = 1;
2293 if (!is_uv1_hub())
2294 write_mmr_data_broadcast(pnode, mmr);
2295 }
2296 }
2297
2298 return 0;
2299}
2300core_initcall(uv_bau_init);
2301fs_initcall(uv_ptc_init);
2302