1
2
3
4
5
6
7
8
9
10
11
12#include <linux/kernel_stat.h>
13#include <linux/perf_event.h>
14#include <linux/signal.h>
15#include <linux/sched.h>
16#include <linux/sched/debug.h>
17#include <linux/kernel.h>
18#include <linux/errno.h>
19#include <linux/string.h>
20#include <linux/types.h>
21#include <linux/ptrace.h>
22#include <linux/mman.h>
23#include <linux/mm.h>
24#include <linux/compat.h>
25#include <linux/smp.h>
26#include <linux/kdebug.h>
27#include <linux/init.h>
28#include <linux/console.h>
29#include <linux/extable.h>
30#include <linux/hardirq.h>
31#include <linux/kprobes.h>
32#include <linux/uaccess.h>
33#include <linux/hugetlb.h>
34#include <asm/asm-offsets.h>
35#include <asm/diag.h>
36#include <asm/pgtable.h>
37#include <asm/gmap.h>
38#include <asm/irq.h>
39#include <asm/mmu_context.h>
40#include <asm/facility.h>
41#include "../kernel/entry.h"
42
43#define __FAIL_ADDR_MASK -4096L
44#define __SUBCODE_MASK 0x0600
45#define __PF_RES_FIELD 0x8000000000000000ULL
46
47#define VM_FAULT_BADCONTEXT 0x010000
48#define VM_FAULT_BADMAP 0x020000
49#define VM_FAULT_BADACCESS 0x040000
50#define VM_FAULT_SIGNAL 0x080000
51#define VM_FAULT_PFAULT 0x100000
52
53enum fault_type {
54 KERNEL_FAULT,
55 USER_FAULT,
56 VDSO_FAULT,
57 GMAP_FAULT,
58};
59
60static unsigned long store_indication __read_mostly;
61
62static int __init fault_init(void)
63{
64 if (test_facility(75))
65 store_indication = 0xc00;
66 return 0;
67}
68early_initcall(fault_init);
69
70static inline int notify_page_fault(struct pt_regs *regs)
71{
72 int ret = 0;
73
74
75 if (kprobes_built_in() && !user_mode(regs)) {
76 preempt_disable();
77 if (kprobe_running() && kprobe_fault_handler(regs, 14))
78 ret = 1;
79 preempt_enable();
80 }
81 return ret;
82}
83
84
85
86
87
88
89void bust_spinlocks(int yes)
90{
91 if (yes) {
92 oops_in_progress = 1;
93 } else {
94 int loglevel_save = console_loglevel;
95 console_unblank();
96 oops_in_progress = 0;
97
98
99
100
101
102 console_loglevel = 15;
103 printk(" ");
104 console_loglevel = loglevel_save;
105 }
106}
107
108
109
110
111static inline enum fault_type get_fault_type(struct pt_regs *regs)
112{
113 unsigned long trans_exc_code;
114
115 trans_exc_code = regs->int_parm_long & 3;
116 if (likely(trans_exc_code == 0)) {
117
118 if (IS_ENABLED(CONFIG_PGSTE) &&
119 test_pt_regs_flag(regs, PIF_GUEST_FAULT))
120 return GMAP_FAULT;
121 if (current->thread.mm_segment == USER_DS)
122 return USER_FAULT;
123 return KERNEL_FAULT;
124 }
125 if (trans_exc_code == 2) {
126
127 if (current->thread.mm_segment & 1) {
128 if (current->thread.mm_segment == USER_DS_SACF)
129 return USER_FAULT;
130 return KERNEL_FAULT;
131 }
132 return VDSO_FAULT;
133 }
134 if (trans_exc_code == 1) {
135
136 return USER_FAULT;
137 }
138
139 return KERNEL_FAULT;
140}
141
142static int bad_address(void *p)
143{
144 unsigned long dummy;
145
146 return probe_kernel_address((unsigned long *)p, dummy);
147}
148
149static void dump_pagetable(unsigned long asce, unsigned long address)
150{
151 unsigned long *table = __va(asce & _ASCE_ORIGIN);
152
153 pr_alert("AS:%016lx ", asce);
154 switch (asce & _ASCE_TYPE_MASK) {
155 case _ASCE_TYPE_REGION1:
156 table += (address & _REGION1_INDEX) >> _REGION1_SHIFT;
157 if (bad_address(table))
158 goto bad;
159 pr_cont("R1:%016lx ", *table);
160 if (*table & _REGION_ENTRY_INVALID)
161 goto out;
162 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
163
164 case _ASCE_TYPE_REGION2:
165 table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
166 if (bad_address(table))
167 goto bad;
168 pr_cont("R2:%016lx ", *table);
169 if (*table & _REGION_ENTRY_INVALID)
170 goto out;
171 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
172
173 case _ASCE_TYPE_REGION3:
174 table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
175 if (bad_address(table))
176 goto bad;
177 pr_cont("R3:%016lx ", *table);
178 if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
179 goto out;
180 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
181
182 case _ASCE_TYPE_SEGMENT:
183 table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
184 if (bad_address(table))
185 goto bad;
186 pr_cont("S:%016lx ", *table);
187 if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
188 goto out;
189 table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
190 }
191 table += (address & _PAGE_INDEX) >> _PAGE_SHIFT;
192 if (bad_address(table))
193 goto bad;
194 pr_cont("P:%016lx ", *table);
195out:
196 pr_cont("\n");
197 return;
198bad:
199 pr_cont("BAD\n");
200}
201
202static void dump_fault_info(struct pt_regs *regs)
203{
204 unsigned long asce;
205
206 pr_alert("Failing address: %016lx TEID: %016lx\n",
207 regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
208 pr_alert("Fault in ");
209 switch (regs->int_parm_long & 3) {
210 case 3:
211 pr_cont("home space ");
212 break;
213 case 2:
214 pr_cont("secondary space ");
215 break;
216 case 1:
217 pr_cont("access register ");
218 break;
219 case 0:
220 pr_cont("primary space ");
221 break;
222 }
223 pr_cont("mode while using ");
224 switch (get_fault_type(regs)) {
225 case USER_FAULT:
226 asce = S390_lowcore.user_asce;
227 pr_cont("user ");
228 break;
229 case VDSO_FAULT:
230 asce = S390_lowcore.vdso_asce;
231 pr_cont("vdso ");
232 break;
233 case GMAP_FAULT:
234 asce = ((struct gmap *) S390_lowcore.gmap)->asce;
235 pr_cont("gmap ");
236 break;
237 case KERNEL_FAULT:
238 asce = S390_lowcore.kernel_asce;
239 pr_cont("kernel ");
240 break;
241 }
242 pr_cont("ASCE.\n");
243 dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
244}
245
246int show_unhandled_signals = 1;
247
248void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
249{
250 if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
251 return;
252 if (!unhandled_signal(current, signr))
253 return;
254 if (!printk_ratelimit())
255 return;
256 printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
257 regs->int_code & 0xffff, regs->int_code >> 17);
258 print_vma_addr(KERN_CONT "in ", regs->psw.addr);
259 printk(KERN_CONT "\n");
260 if (is_mm_fault)
261 dump_fault_info(regs);
262 show_regs(regs);
263}
264
265
266
267
268
269static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
270{
271 report_user_fault(regs, SIGSEGV, 1);
272 force_sig_fault(SIGSEGV, si_code,
273 (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
274 current);
275}
276
277const struct exception_table_entry *s390_search_extables(unsigned long addr)
278{
279 const struct exception_table_entry *fixup;
280
281 fixup = search_extable(__start_dma_ex_table,
282 __stop_dma_ex_table - __start_dma_ex_table,
283 addr);
284 if (!fixup)
285 fixup = search_exception_tables(addr);
286 return fixup;
287}
288
289static noinline void do_no_context(struct pt_regs *regs)
290{
291 const struct exception_table_entry *fixup;
292
293
294 fixup = s390_search_extables(regs->psw.addr);
295 if (fixup) {
296 regs->psw.addr = extable_fixup(fixup);
297 return;
298 }
299
300
301
302
303
304 if (get_fault_type(regs) == KERNEL_FAULT)
305 printk(KERN_ALERT "Unable to handle kernel pointer dereference"
306 " in virtual kernel address space\n");
307 else
308 printk(KERN_ALERT "Unable to handle kernel paging request"
309 " in virtual user address space\n");
310 dump_fault_info(regs);
311 die(regs, "Oops");
312 do_exit(SIGKILL);
313}
314
315static noinline void do_low_address(struct pt_regs *regs)
316{
317
318
319 if (regs->psw.mask & PSW_MASK_PSTATE) {
320
321 die (regs, "Low-address protection");
322 do_exit(SIGKILL);
323 }
324
325 do_no_context(regs);
326}
327
328static noinline void do_sigbus(struct pt_regs *regs)
329{
330
331
332
333
334 force_sig_fault(SIGBUS, BUS_ADRERR,
335 (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
336 current);
337}
338
339static noinline int signal_return(struct pt_regs *regs)
340{
341 u16 instruction;
342 int rc;
343
344 rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
345 if (rc)
346 return rc;
347 if (instruction == 0x0a77) {
348 set_pt_regs_flag(regs, PIF_SYSCALL);
349 regs->int_code = 0x00040077;
350 return 0;
351 } else if (instruction == 0x0aad) {
352 set_pt_regs_flag(regs, PIF_SYSCALL);
353 regs->int_code = 0x000400ad;
354 return 0;
355 }
356 return -EACCES;
357}
358
359static noinline void do_fault_error(struct pt_regs *regs, int access,
360 vm_fault_t fault)
361{
362 int si_code;
363
364 switch (fault) {
365 case VM_FAULT_BADACCESS:
366 if (access == VM_EXEC && signal_return(regs) == 0)
367 break;
368 case VM_FAULT_BADMAP:
369
370 if (user_mode(regs)) {
371
372 si_code = (fault == VM_FAULT_BADMAP) ?
373 SEGV_MAPERR : SEGV_ACCERR;
374 do_sigsegv(regs, si_code);
375 break;
376 }
377 case VM_FAULT_BADCONTEXT:
378 case VM_FAULT_PFAULT:
379 do_no_context(regs);
380 break;
381 case VM_FAULT_SIGNAL:
382 if (!user_mode(regs))
383 do_no_context(regs);
384 break;
385 default:
386 if (fault & VM_FAULT_OOM) {
387 if (!user_mode(regs))
388 do_no_context(regs);
389 else
390 pagefault_out_of_memory();
391 } else if (fault & VM_FAULT_SIGSEGV) {
392
393 if (!user_mode(regs))
394 do_no_context(regs);
395 else
396 do_sigsegv(regs, SEGV_MAPERR);
397 } else if (fault & VM_FAULT_SIGBUS) {
398
399 if (!user_mode(regs))
400 do_no_context(regs);
401 else
402 do_sigbus(regs);
403 } else
404 BUG();
405 break;
406 }
407}
408
409
410
411
412
413
414
415
416
417
418
419
420static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
421{
422 struct gmap *gmap;
423 struct task_struct *tsk;
424 struct mm_struct *mm;
425 struct vm_area_struct *vma;
426 enum fault_type type;
427 unsigned long trans_exc_code;
428 unsigned long address;
429 unsigned int flags;
430 vm_fault_t fault;
431
432 tsk = current;
433
434
435
436
437 clear_pt_regs_flag(regs, PIF_PER_TRAP);
438
439 if (notify_page_fault(regs))
440 return 0;
441
442 mm = tsk->mm;
443 trans_exc_code = regs->int_parm_long;
444
445
446
447
448
449
450 fault = VM_FAULT_BADCONTEXT;
451 type = get_fault_type(regs);
452 switch (type) {
453 case KERNEL_FAULT:
454 goto out;
455 case VDSO_FAULT:
456 fault = VM_FAULT_BADMAP;
457 goto out;
458 case USER_FAULT:
459 case GMAP_FAULT:
460 if (faulthandler_disabled() || !mm)
461 goto out;
462 break;
463 }
464
465 address = trans_exc_code & __FAIL_ADDR_MASK;
466 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
467 flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
468 if (user_mode(regs))
469 flags |= FAULT_FLAG_USER;
470 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
471 flags |= FAULT_FLAG_WRITE;
472 down_read(&mm->mmap_sem);
473
474 gmap = NULL;
475 if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
476 gmap = (struct gmap *) S390_lowcore.gmap;
477 current->thread.gmap_addr = address;
478 current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
479 current->thread.gmap_int_code = regs->int_code & 0xffff;
480 address = __gmap_translate(gmap, address);
481 if (address == -EFAULT) {
482 fault = VM_FAULT_BADMAP;
483 goto out_up;
484 }
485 if (gmap->pfault_enabled)
486 flags |= FAULT_FLAG_RETRY_NOWAIT;
487 }
488
489retry:
490 fault = VM_FAULT_BADMAP;
491 vma = find_vma(mm, address);
492 if (!vma)
493 goto out_up;
494
495 if (unlikely(vma->vm_start > address)) {
496 if (!(vma->vm_flags & VM_GROWSDOWN))
497 goto out_up;
498 if (expand_stack(vma, address))
499 goto out_up;
500 }
501
502
503
504
505
506 fault = VM_FAULT_BADACCESS;
507 if (unlikely(!(vma->vm_flags & access)))
508 goto out_up;
509
510 if (is_vm_hugetlb_page(vma))
511 address &= HPAGE_MASK;
512
513
514
515
516
517 fault = handle_mm_fault(vma, address, flags);
518
519 if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
520 fault = VM_FAULT_SIGNAL;
521 if (flags & FAULT_FLAG_RETRY_NOWAIT)
522 goto out_up;
523 goto out;
524 }
525 if (unlikely(fault & VM_FAULT_ERROR))
526 goto out_up;
527
528
529
530
531
532
533 if (flags & FAULT_FLAG_ALLOW_RETRY) {
534 if (fault & VM_FAULT_MAJOR) {
535 tsk->maj_flt++;
536 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
537 regs, address);
538 } else {
539 tsk->min_flt++;
540 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
541 regs, address);
542 }
543 if (fault & VM_FAULT_RETRY) {
544 if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
545 (flags & FAULT_FLAG_RETRY_NOWAIT)) {
546
547
548 current->thread.gmap_pfault = 1;
549 fault = VM_FAULT_PFAULT;
550 goto out_up;
551 }
552
553
554 flags &= ~(FAULT_FLAG_ALLOW_RETRY |
555 FAULT_FLAG_RETRY_NOWAIT);
556 flags |= FAULT_FLAG_TRIED;
557 down_read(&mm->mmap_sem);
558 goto retry;
559 }
560 }
561 if (IS_ENABLED(CONFIG_PGSTE) && gmap) {
562 address = __gmap_link(gmap, current->thread.gmap_addr,
563 address);
564 if (address == -EFAULT) {
565 fault = VM_FAULT_BADMAP;
566 goto out_up;
567 }
568 if (address == -ENOMEM) {
569 fault = VM_FAULT_OOM;
570 goto out_up;
571 }
572 }
573 fault = 0;
574out_up:
575 up_read(&mm->mmap_sem);
576out:
577 return fault;
578}
579
580void do_protection_exception(struct pt_regs *regs)
581{
582 unsigned long trans_exc_code;
583 int access;
584 vm_fault_t fault;
585
586 trans_exc_code = regs->int_parm_long;
587
588
589
590
591
592 if (!(regs->int_code & 0x200))
593 regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
594
595
596
597
598
599 if (unlikely(!(trans_exc_code & 4))) {
600 do_low_address(regs);
601 return;
602 }
603 if (unlikely(MACHINE_HAS_NX && (trans_exc_code & 0x80))) {
604 regs->int_parm_long = (trans_exc_code & ~PAGE_MASK) |
605 (regs->psw.addr & PAGE_MASK);
606 access = VM_EXEC;
607 fault = VM_FAULT_BADACCESS;
608 } else {
609 access = VM_WRITE;
610 fault = do_exception(regs, access);
611 }
612 if (unlikely(fault))
613 do_fault_error(regs, access, fault);
614}
615NOKPROBE_SYMBOL(do_protection_exception);
616
617void do_dat_exception(struct pt_regs *regs)
618{
619 int access;
620 vm_fault_t fault;
621
622 access = VM_READ | VM_EXEC | VM_WRITE;
623 fault = do_exception(regs, access);
624 if (unlikely(fault))
625 do_fault_error(regs, access, fault);
626}
627NOKPROBE_SYMBOL(do_dat_exception);
628
629#ifdef CONFIG_PFAULT
630
631
632
633static int pfault_disable;
634
635static int __init nopfault(char *str)
636{
637 pfault_disable = 1;
638 return 1;
639}
640
641__setup("nopfault", nopfault);
642
643struct pfault_refbk {
644 u16 refdiagc;
645 u16 reffcode;
646 u16 refdwlen;
647 u16 refversn;
648 u64 refgaddr;
649 u64 refselmk;
650 u64 refcmpmk;
651 u64 reserved;
652} __attribute__ ((packed, aligned(8)));
653
654int pfault_init(void)
655{
656 struct pfault_refbk refbk = {
657 .refdiagc = 0x258,
658 .reffcode = 0,
659 .refdwlen = 5,
660 .refversn = 2,
661 .refgaddr = __LC_LPP,
662 .refselmk = 1ULL << 48,
663 .refcmpmk = 1ULL << 48,
664 .reserved = __PF_RES_FIELD };
665 int rc;
666
667 if (pfault_disable)
668 return -1;
669 diag_stat_inc(DIAG_STAT_X258);
670 asm volatile(
671 " diag %1,%0,0x258\n"
672 "0: j 2f\n"
673 "1: la %0,8\n"
674 "2:\n"
675 EX_TABLE(0b,1b)
676 : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
677 return rc;
678}
679
680void pfault_fini(void)
681{
682 struct pfault_refbk refbk = {
683 .refdiagc = 0x258,
684 .reffcode = 1,
685 .refdwlen = 5,
686 .refversn = 2,
687 };
688
689 if (pfault_disable)
690 return;
691 diag_stat_inc(DIAG_STAT_X258);
692 asm volatile(
693 " diag %0,0,0x258\n"
694 "0: nopr %%r7\n"
695 EX_TABLE(0b,0b)
696 : : "a" (&refbk), "m" (refbk) : "cc");
697}
698
699static DEFINE_SPINLOCK(pfault_lock);
700static LIST_HEAD(pfault_list);
701
702#define PF_COMPLETE 0x0080
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725static void pfault_interrupt(struct ext_code ext_code,
726 unsigned int param32, unsigned long param64)
727{
728 struct task_struct *tsk;
729 __u16 subcode;
730 pid_t pid;
731
732
733
734
735
736
737 subcode = ext_code.subcode;
738 if ((subcode & 0xff00) != __SUBCODE_MASK)
739 return;
740 inc_irq_stat(IRQEXT_PFL);
741
742 pid = param64 & LPP_PID_MASK;
743 rcu_read_lock();
744 tsk = find_task_by_pid_ns(pid, &init_pid_ns);
745 if (tsk)
746 get_task_struct(tsk);
747 rcu_read_unlock();
748 if (!tsk)
749 return;
750 spin_lock(&pfault_lock);
751 if (subcode & PF_COMPLETE) {
752
753 if (tsk->thread.pfault_wait == 1) {
754
755
756
757
758
759 tsk->thread.pfault_wait = 0;
760 list_del(&tsk->thread.list);
761 wake_up_process(tsk);
762 put_task_struct(tsk);
763 } else {
764
765
766
767
768
769
770
771 if (tsk->state == TASK_RUNNING)
772 tsk->thread.pfault_wait = -1;
773 }
774 } else {
775
776 if (WARN_ON_ONCE(tsk != current))
777 goto out;
778 if (tsk->thread.pfault_wait == 1) {
779
780 goto block;
781 } else if (tsk->thread.pfault_wait == -1) {
782
783
784
785 tsk->thread.pfault_wait = 0;
786 } else {
787
788
789
790
791
792 get_task_struct(tsk);
793 tsk->thread.pfault_wait = 1;
794 list_add(&tsk->thread.list, &pfault_list);
795block:
796
797
798
799 __set_current_state(TASK_UNINTERRUPTIBLE);
800 set_tsk_need_resched(tsk);
801 set_preempt_need_resched();
802 }
803 }
804out:
805 spin_unlock(&pfault_lock);
806 put_task_struct(tsk);
807}
808
809static int pfault_cpu_dead(unsigned int cpu)
810{
811 struct thread_struct *thread, *next;
812 struct task_struct *tsk;
813
814 spin_lock_irq(&pfault_lock);
815 list_for_each_entry_safe(thread, next, &pfault_list, list) {
816 thread->pfault_wait = 0;
817 list_del(&thread->list);
818 tsk = container_of(thread, struct task_struct, thread);
819 wake_up_process(tsk);
820 put_task_struct(tsk);
821 }
822 spin_unlock_irq(&pfault_lock);
823 return 0;
824}
825
826static int __init pfault_irq_init(void)
827{
828 int rc;
829
830 rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
831 if (rc)
832 goto out_extint;
833 rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
834 if (rc)
835 goto out_pfault;
836 irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
837 cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
838 NULL, pfault_cpu_dead);
839 return 0;
840
841out_pfault:
842 unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
843out_extint:
844 pfault_disable = 1;
845 return rc;
846}
847early_initcall(pfault_irq_init);
848
849#endif
850