1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/signal.h>
15#include <linux/sched.h>
16#include <linux/sched/task_stack.h>
17#include <linux/kernel.h>
18#include <linux/errno.h>
19#include <linux/string.h>
20#include <linux/types.h>
21#include <linux/pagemap.h>
22#include <linux/ptrace.h>
23#include <linux/mman.h>
24#include <linux/mm.h>
25#include <linux/interrupt.h>
26#include <linux/highmem.h>
27#include <linux/extable.h>
28#include <linux/kprobes.h>
29#include <linux/kdebug.h>
30#include <linux/perf_event.h>
31#include <linux/ratelimit.h>
32#include <linux/context_tracking.h>
33#include <linux/hugetlb.h>
34#include <linux/uaccess.h>
35
36#include <asm/firmware.h>
37#include <asm/page.h>
38#include <asm/pgtable.h>
39#include <asm/mmu.h>
40#include <asm/mmu_context.h>
41#include <asm/siginfo.h>
42#include <asm/debug.h>
43#include <asm/kup.h>
44
45
46
47
48
49static bool store_updates_sp(unsigned int inst)
50{
51
52 if (((inst >> 16) & 0x1f) != 1)
53 return false;
54
55 switch (inst >> 26) {
56 case OP_STWU:
57 case OP_STBU:
58 case OP_STHU:
59 case OP_STFSU:
60 case OP_STFDU:
61 return true;
62 case OP_STD:
63 return (inst & 3) == 1;
64 case OP_31:
65
66 switch ((inst >> 1) & 0x3ff) {
67 case OP_31_XOP_STDUX:
68 case OP_31_XOP_STWUX:
69 case OP_31_XOP_STBUX:
70 case OP_31_XOP_STHUX:
71 case OP_31_XOP_STFSUX:
72 case OP_31_XOP_STFDUX:
73 return true;
74 }
75 }
76 return false;
77}
78
79
80
81
82static int
83__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
84{
85
86
87
88
89
90 if (!user_mode(regs))
91 return SIGSEGV;
92
93 _exception(SIGSEGV, regs, si_code, address);
94
95 return 0;
96}
97
98static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
99{
100 return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
101}
102
103static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
104{
105 struct mm_struct *mm = current->mm;
106
107
108
109
110
111 up_read(&mm->mmap_sem);
112
113 return __bad_area_nosemaphore(regs, address, si_code);
114}
115
116static noinline int bad_area(struct pt_regs *regs, unsigned long address)
117{
118 return __bad_area(regs, address, SEGV_MAPERR);
119}
120
121static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
122 int pkey)
123{
124
125
126
127
128
129 if (!user_mode(regs))
130 return SIGSEGV;
131
132 _exception_pkey(regs, address, pkey);
133
134 return 0;
135}
136
137static noinline int bad_access(struct pt_regs *regs, unsigned long address)
138{
139 return __bad_area(regs, address, SEGV_ACCERR);
140}
141
142static int do_sigbus(struct pt_regs *regs, unsigned long address,
143 vm_fault_t fault)
144{
145 if (!user_mode(regs))
146 return SIGBUS;
147
148 current->thread.trap_nr = BUS_ADRERR;
149#ifdef CONFIG_MEMORY_FAILURE
150 if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
151 unsigned int lsb = 0;
152
153 pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
154 current->comm, current->pid, address);
155
156 if (fault & VM_FAULT_HWPOISON_LARGE)
157 lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
158 if (fault & VM_FAULT_HWPOISON)
159 lsb = PAGE_SHIFT;
160
161 force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
162 return 0;
163 }
164
165#endif
166 force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
167 return 0;
168}
169
170static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
171 vm_fault_t fault)
172{
173
174
175
176
177 if (fatal_signal_pending(current) && !user_mode(regs))
178 return SIGKILL;
179
180
181 if (fault & VM_FAULT_OOM) {
182
183
184
185
186 if (!user_mode(regs))
187 return SIGSEGV;
188 pagefault_out_of_memory();
189 } else {
190 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
191 VM_FAULT_HWPOISON_LARGE))
192 return do_sigbus(regs, addr, fault);
193 else if (fault & VM_FAULT_SIGSEGV)
194 return bad_area_nosemaphore(regs, addr);
195 else
196 BUG();
197 }
198 return 0;
199}
200
201
202static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
203 unsigned long address, bool is_write)
204{
205 int is_exec = TRAP(regs) == 0x400;
206
207
208 if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT |
209 DSISR_PROTFAULT))) {
210 pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n",
211 address >= TASK_SIZE ? "exec-protected" : "user",
212 address,
213 from_kuid(&init_user_ns, current_uid()));
214
215
216 return true;
217 }
218
219 if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) &&
220 !search_exception_tables(regs->nip)) {
221 pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
222 address,
223 from_kuid(&init_user_ns, current_uid()));
224 }
225
226
227 if (address >= TASK_SIZE)
228 return true;
229
230
231 if (!search_exception_tables(regs->nip))
232 return true;
233
234
235
236 if (bad_kuap_fault(regs, is_write))
237 return true;
238
239
240
241 return false;
242}
243
244static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
245 struct vm_area_struct *vma, unsigned int flags,
246 bool *must_retry)
247{
248
249
250
251
252
253
254
255
256
257 if (address + 0x100000 < vma->vm_end) {
258 unsigned int __user *nip = (unsigned int __user *)regs->nip;
259
260 struct pt_regs *uregs = current->thread.regs;
261 if (uregs == NULL)
262 return true;
263
264
265
266
267
268
269
270
271
272
273
274
275
276 if (address + 2048 >= uregs->gpr[1])
277 return false;
278
279 if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
280 access_ok(nip, sizeof(*nip))) {
281 unsigned int inst;
282 int res;
283
284 pagefault_disable();
285 res = __get_user_inatomic(inst, nip);
286 pagefault_enable();
287 if (!res)
288 return !store_updates_sp(inst);
289 *must_retry = true;
290 }
291 return true;
292 }
293 return false;
294}
295
296static bool access_error(bool is_write, bool is_exec,
297 struct vm_area_struct *vma)
298{
299
300
301
302
303
304
305
306
307
308
309 if (is_exec) {
310 return !(vma->vm_flags & VM_EXEC) &&
311 (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
312 !(vma->vm_flags & (VM_READ | VM_WRITE)));
313 }
314
315 if (is_write) {
316 if (unlikely(!(vma->vm_flags & VM_WRITE)))
317 return true;
318 return false;
319 }
320
321 if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
322 return true;
323
324
325
326
327
328
329 return false;
330}
331
332#ifdef CONFIG_PPC_SMLPAR
333static inline void cmo_account_page_fault(void)
334{
335 if (firmware_has_feature(FW_FEATURE_CMO)) {
336 u32 page_ins;
337
338 preempt_disable();
339 page_ins = be32_to_cpu(get_lppaca()->page_ins);
340 page_ins += 1 << PAGE_FACTOR;
341 get_lppaca()->page_ins = cpu_to_be32(page_ins);
342 preempt_enable();
343 }
344}
345#else
346static inline void cmo_account_page_fault(void) { }
347#endif
348
349#ifdef CONFIG_PPC_BOOK3S
350static void sanity_check_fault(bool is_write, bool is_user,
351 unsigned long error_code, unsigned long address)
352{
353
354
355
356 if (is_user && address >= TASK_SIZE) {
357 pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n",
358 current->comm, current->pid, address,
359 from_kuid(&init_user_ns, current_uid()));
360 return;
361 }
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392 if (radix_enabled() || is_write)
393 return;
394
395 WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
396}
397#else
398static void sanity_check_fault(bool is_write, bool is_user,
399 unsigned long error_code, unsigned long address) { }
400#endif
401
402
403
404
405
406#if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
407#define page_fault_is_write(__err) ((__err) & ESR_DST)
408#define page_fault_is_bad(__err) (0)
409#else
410#define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE)
411#if defined(CONFIG_PPC_8xx)
412#define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G)
413#elif defined(CONFIG_PPC64)
414#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S)
415#else
416#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
417#endif
418#endif
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433static int __do_page_fault(struct pt_regs *regs, unsigned long address,
434 unsigned long error_code)
435{
436 struct vm_area_struct * vma;
437 struct mm_struct *mm = current->mm;
438 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
439 int is_exec = TRAP(regs) == 0x400;
440 int is_user = user_mode(regs);
441 int is_write = page_fault_is_write(error_code);
442 vm_fault_t fault, major = 0;
443 bool must_retry = false;
444 bool kprobe_fault = kprobe_page_fault(regs, 11);
445
446 if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
447 return 0;
448
449 if (unlikely(page_fault_is_bad(error_code))) {
450 if (is_user) {
451 _exception(SIGBUS, regs, BUS_OBJERR, address);
452 return 0;
453 }
454 return SIGBUS;
455 }
456
457
458 sanity_check_fault(is_write, is_user, error_code, address);
459
460
461
462
463
464
465 if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write)))
466 return SIGSEGV;
467
468
469
470
471
472 if (unlikely(faulthandler_disabled() || !mm)) {
473 if (is_user)
474 printk_ratelimited(KERN_ERR "Page fault in user mode"
475 " with faulthandler_disabled()=%d"
476 " mm=%p\n",
477 faulthandler_disabled(), mm);
478 return bad_area_nosemaphore(regs, address);
479 }
480
481
482 if (!arch_irq_disabled_regs(regs))
483 local_irq_enable();
484
485 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
486
487 if (error_code & DSISR_KEYFAULT)
488 return bad_key_fault_exception(regs, address,
489 get_mm_addr_key(mm, address));
490
491
492
493
494
495
496 if (is_user)
497 flags |= FAULT_FLAG_USER;
498 if (is_write)
499 flags |= FAULT_FLAG_WRITE;
500 if (is_exec)
501 flags |= FAULT_FLAG_INSTRUCTION;
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518 if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
519 if (!is_user && !search_exception_tables(regs->nip))
520 return bad_area_nosemaphore(regs, address);
521
522retry:
523 down_read(&mm->mmap_sem);
524 } else {
525
526
527
528
529
530 might_sleep();
531 }
532
533 vma = find_vma(mm, address);
534 if (unlikely(!vma))
535 return bad_area(regs, address);
536 if (likely(vma->vm_start <= address))
537 goto good_area;
538 if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
539 return bad_area(regs, address);
540
541
542 if (unlikely(bad_stack_expansion(regs, address, vma, flags,
543 &must_retry))) {
544 if (!must_retry)
545 return bad_area(regs, address);
546
547 up_read(&mm->mmap_sem);
548 if (fault_in_pages_readable((const char __user *)regs->nip,
549 sizeof(unsigned int)))
550 return bad_area_nosemaphore(regs, address);
551 goto retry;
552 }
553
554
555 if (unlikely(expand_stack(vma, address)))
556 return bad_area(regs, address);
557
558good_area:
559 if (unlikely(access_error(is_write, is_exec, vma)))
560 return bad_access(regs, address);
561
562
563
564
565
566
567 fault = handle_mm_fault(vma, address, flags);
568
569#ifdef CONFIG_PPC_MEM_KEYS
570
571
572
573
574 if (unlikely(fault & VM_FAULT_SIGSEGV) &&
575 !arch_vma_access_permitted(vma, is_write, is_exec, 0)) {
576
577 int pkey = vma_pkey(vma);
578
579 up_read(&mm->mmap_sem);
580 return bad_key_fault_exception(regs, address, pkey);
581 }
582#endif
583
584 major |= fault & VM_FAULT_MAJOR;
585
586
587
588
589
590 if (unlikely(fault & VM_FAULT_RETRY)) {
591
592 if (flags & FAULT_FLAG_ALLOW_RETRY) {
593
594
595
596
597 flags &= ~FAULT_FLAG_ALLOW_RETRY;
598 flags |= FAULT_FLAG_TRIED;
599 if (!fatal_signal_pending(current))
600 goto retry;
601 }
602
603
604
605
606
607 return is_user ? 0 : SIGBUS;
608 }
609
610 up_read(¤t->mm->mmap_sem);
611
612 if (unlikely(fault & VM_FAULT_ERROR))
613 return mm_fault_error(regs, address, fault);
614
615
616
617
618 if (major) {
619 current->maj_flt++;
620 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
621 cmo_account_page_fault();
622 } else {
623 current->min_flt++;
624 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
625 }
626 return 0;
627}
628NOKPROBE_SYMBOL(__do_page_fault);
629
630int do_page_fault(struct pt_regs *regs, unsigned long address,
631 unsigned long error_code)
632{
633 enum ctx_state prev_state = exception_enter();
634 int rc = __do_page_fault(regs, address, error_code);
635 exception_exit(prev_state);
636 return rc;
637}
638NOKPROBE_SYMBOL(do_page_fault);
639
640
641
642
643
644
645void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
646{
647 const struct exception_table_entry *entry;
648
649
650 if ((entry = search_exception_tables(regs->nip)) != NULL) {
651 regs->nip = extable_fixup(entry);
652 return;
653 }
654
655
656
657 switch (TRAP(regs)) {
658 case 0x300:
659 case 0x380:
660 case 0xe00:
661 pr_alert("BUG: %s at 0x%08lx\n",
662 regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
663 "Unable to handle kernel data access", regs->dar);
664 break;
665 case 0x400:
666 case 0x480:
667 pr_alert("BUG: Unable to handle kernel instruction fetch%s",
668 regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n");
669 break;
670 case 0x600:
671 pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n",
672 regs->dar);
673 break;
674 default:
675 pr_alert("BUG: Unable to handle unknown paging fault at 0x%08lx\n",
676 regs->dar);
677 break;
678 }
679 printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
680 regs->nip);
681
682 if (task_stack_end_corrupted(current))
683 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
684
685 die("Kernel access of bad area", regs, sig);
686}
687