1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/signal.h>
15#include <linux/sched.h>
16#include <linux/sched/task_stack.h>
17#include <linux/kernel.h>
18#include <linux/errno.h>
19#include <linux/string.h>
20#include <linux/types.h>
21#include <linux/pagemap.h>
22#include <linux/ptrace.h>
23#include <linux/mman.h>
24#include <linux/mm.h>
25#include <linux/interrupt.h>
26#include <linux/highmem.h>
27#include <linux/extable.h>
28#include <linux/kprobes.h>
29#include <linux/kdebug.h>
30#include <linux/perf_event.h>
31#include <linux/ratelimit.h>
32#include <linux/context_tracking.h>
33#include <linux/hugetlb.h>
34#include <linux/uaccess.h>
35
36#include <asm/firmware.h>
37#include <asm/page.h>
38#include <asm/pgtable.h>
39#include <asm/mmu.h>
40#include <asm/mmu_context.h>
41#include <asm/siginfo.h>
42#include <asm/debug.h>
43#include <asm/kup.h>
44
45
46
47
48
49static bool store_updates_sp(unsigned int inst)
50{
51
52 if (((inst >> 16) & 0x1f) != 1)
53 return false;
54
55 switch (inst >> 26) {
56 case OP_STWU:
57 case OP_STBU:
58 case OP_STHU:
59 case OP_STFSU:
60 case OP_STFDU:
61 return true;
62 case OP_STD:
63 return (inst & 3) == 1;
64 case OP_31:
65
66 switch ((inst >> 1) & 0x3ff) {
67 case OP_31_XOP_STDUX:
68 case OP_31_XOP_STWUX:
69 case OP_31_XOP_STBUX:
70 case OP_31_XOP_STHUX:
71 case OP_31_XOP_STFSUX:
72 case OP_31_XOP_STFDUX:
73 return true;
74 }
75 }
76 return false;
77}
78
79
80
81
82static int
83__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
84{
85
86
87
88
89
90 if (!user_mode(regs))
91 return SIGSEGV;
92
93 _exception(SIGSEGV, regs, si_code, address);
94
95 return 0;
96}
97
98static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
99{
100 return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
101}
102
103static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
104{
105 struct mm_struct *mm = current->mm;
106
107
108
109
110
111 up_read(&mm->mmap_sem);
112
113 return __bad_area_nosemaphore(regs, address, si_code);
114}
115
116static noinline int bad_area(struct pt_regs *regs, unsigned long address)
117{
118 return __bad_area(regs, address, SEGV_MAPERR);
119}
120
121static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
122 int pkey)
123{
124
125
126
127
128
129 if (!user_mode(regs))
130 return SIGSEGV;
131
132 _exception_pkey(regs, address, pkey);
133
134 return 0;
135}
136
137static noinline int bad_access(struct pt_regs *regs, unsigned long address)
138{
139 return __bad_area(regs, address, SEGV_ACCERR);
140}
141
142static int do_sigbus(struct pt_regs *regs, unsigned long address,
143 vm_fault_t fault)
144{
145 if (!user_mode(regs))
146 return SIGBUS;
147
148 current->thread.trap_nr = BUS_ADRERR;
149#ifdef CONFIG_MEMORY_FAILURE
150 if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
151 unsigned int lsb = 0;
152
153 pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
154 current->comm, current->pid, address);
155
156 if (fault & VM_FAULT_HWPOISON_LARGE)
157 lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
158 if (fault & VM_FAULT_HWPOISON)
159 lsb = PAGE_SHIFT;
160
161 force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
162 return 0;
163 }
164
165#endif
166 force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
167 return 0;
168}
169
170static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
171 vm_fault_t fault)
172{
173
174
175
176
177 if (fatal_signal_pending(current) && !user_mode(regs))
178 return SIGKILL;
179
180
181 if (fault & VM_FAULT_OOM) {
182
183
184
185
186 if (!user_mode(regs))
187 return SIGSEGV;
188 pagefault_out_of_memory();
189 } else {
190 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
191 VM_FAULT_HWPOISON_LARGE))
192 return do_sigbus(regs, addr, fault);
193 else if (fault & VM_FAULT_SIGSEGV)
194 return bad_area_nosemaphore(regs, addr);
195 else
196 BUG();
197 }
198 return 0;
199}
200
201
202static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
203 unsigned long address, bool is_write)
204{
205 int is_exec = TRAP(regs) == 0x400;
206
207
208 if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT |
209 DSISR_PROTFAULT))) {
210 pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n",
211 address >= TASK_SIZE ? "exec-protected" : "user",
212 address,
213 from_kuid(&init_user_ns, current_uid()));
214
215
216 return true;
217 }
218
219 if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) &&
220 !search_exception_tables(regs->nip)) {
221 pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
222 address,
223 from_kuid(&init_user_ns, current_uid()));
224 }
225
226
227 if (address >= TASK_SIZE)
228 return true;
229
230
231 if (!search_exception_tables(regs->nip))
232 return true;
233
234
235
236 if (bad_kuap_fault(regs, address, is_write))
237 return true;
238
239
240
241 return false;
242}
243
244static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
245 struct vm_area_struct *vma, unsigned int flags,
246 bool *must_retry)
247{
248
249
250
251
252
253
254
255
256
257 if (address + 0x100000 < vma->vm_end) {
258 unsigned int __user *nip = (unsigned int __user *)regs->nip;
259
260 struct pt_regs *uregs = current->thread.regs;
261 if (uregs == NULL)
262 return true;
263
264
265
266
267
268
269
270
271
272
273
274
275
276 if (address + 2048 >= uregs->gpr[1])
277 return false;
278
279 if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
280 access_ok(nip, sizeof(*nip))) {
281 unsigned int inst;
282
283 if (!probe_user_read(&inst, nip, sizeof(inst)))
284 return !store_updates_sp(inst);
285 *must_retry = true;
286 }
287 return true;
288 }
289 return false;
290}
291
292static bool access_error(bool is_write, bool is_exec,
293 struct vm_area_struct *vma)
294{
295
296
297
298
299
300
301
302
303
304
305 if (is_exec) {
306 return !(vma->vm_flags & VM_EXEC) &&
307 (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
308 !(vma->vm_flags & (VM_READ | VM_WRITE)));
309 }
310
311 if (is_write) {
312 if (unlikely(!(vma->vm_flags & VM_WRITE)))
313 return true;
314 return false;
315 }
316
317 if (unlikely(!vma_is_accessible(vma)))
318 return true;
319
320
321
322
323
324
325 return false;
326}
327
328#ifdef CONFIG_PPC_SMLPAR
329static inline void cmo_account_page_fault(void)
330{
331 if (firmware_has_feature(FW_FEATURE_CMO)) {
332 u32 page_ins;
333
334 preempt_disable();
335 page_ins = be32_to_cpu(get_lppaca()->page_ins);
336 page_ins += 1 << PAGE_FACTOR;
337 get_lppaca()->page_ins = cpu_to_be32(page_ins);
338 preempt_enable();
339 }
340}
341#else
342static inline void cmo_account_page_fault(void) { }
343#endif
344
345#ifdef CONFIG_PPC_BOOK3S
346static void sanity_check_fault(bool is_write, bool is_user,
347 unsigned long error_code, unsigned long address)
348{
349
350
351
352 if (is_user && address >= TASK_SIZE) {
353 if ((long)address == -1)
354 return;
355
356 pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n",
357 current->comm, current->pid, address,
358 from_kuid(&init_user_ns, current_uid()));
359 return;
360 }
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391 if (radix_enabled() || is_write)
392 return;
393
394 WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
395}
396#else
397static void sanity_check_fault(bool is_write, bool is_user,
398 unsigned long error_code, unsigned long address) { }
399#endif
400
401
402
403
404
405#if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
406#define page_fault_is_write(__err) ((__err) & ESR_DST)
407#define page_fault_is_bad(__err) (0)
408#else
409#define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE)
410#if defined(CONFIG_PPC_8xx)
411#define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G)
412#elif defined(CONFIG_PPC64)
413#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S)
414#else
415#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
416#endif
417#endif
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432static int __do_page_fault(struct pt_regs *regs, unsigned long address,
433 unsigned long error_code)
434{
435 struct vm_area_struct * vma;
436 struct mm_struct *mm = current->mm;
437 unsigned int flags = FAULT_FLAG_DEFAULT;
438 int is_exec = TRAP(regs) == 0x400;
439 int is_user = user_mode(regs);
440 int is_write = page_fault_is_write(error_code);
441 vm_fault_t fault, major = 0;
442 bool must_retry = false;
443 bool kprobe_fault = kprobe_page_fault(regs, 11);
444
445 if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
446 return 0;
447
448 if (unlikely(page_fault_is_bad(error_code))) {
449 if (is_user) {
450 _exception(SIGBUS, regs, BUS_OBJERR, address);
451 return 0;
452 }
453 return SIGBUS;
454 }
455
456
457 sanity_check_fault(is_write, is_user, error_code, address);
458
459
460
461
462
463
464 if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write)))
465 return SIGSEGV;
466
467
468
469
470
471 if (unlikely(faulthandler_disabled() || !mm)) {
472 if (is_user)
473 printk_ratelimited(KERN_ERR "Page fault in user mode"
474 " with faulthandler_disabled()=%d"
475 " mm=%p\n",
476 faulthandler_disabled(), mm);
477 return bad_area_nosemaphore(regs, address);
478 }
479
480
481 if (!arch_irq_disabled_regs(regs))
482 local_irq_enable();
483
484 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
485
486 if (error_code & DSISR_KEYFAULT)
487 return bad_key_fault_exception(regs, address,
488 get_mm_addr_key(mm, address));
489
490
491
492
493
494
495 if (is_user)
496 flags |= FAULT_FLAG_USER;
497 if (is_write)
498 flags |= FAULT_FLAG_WRITE;
499 if (is_exec)
500 flags |= FAULT_FLAG_INSTRUCTION;
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517 if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
518 if (!is_user && !search_exception_tables(regs->nip))
519 return bad_area_nosemaphore(regs, address);
520
521retry:
522 down_read(&mm->mmap_sem);
523 } else {
524
525
526
527
528
529 might_sleep();
530 }
531
532 vma = find_vma(mm, address);
533 if (unlikely(!vma))
534 return bad_area(regs, address);
535 if (likely(vma->vm_start <= address))
536 goto good_area;
537 if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
538 return bad_area(regs, address);
539
540
541 if (unlikely(bad_stack_expansion(regs, address, vma, flags,
542 &must_retry))) {
543 if (!must_retry)
544 return bad_area(regs, address);
545
546 up_read(&mm->mmap_sem);
547 if (fault_in_pages_readable((const char __user *)regs->nip,
548 sizeof(unsigned int)))
549 return bad_area_nosemaphore(regs, address);
550 goto retry;
551 }
552
553
554 if (unlikely(expand_stack(vma, address)))
555 return bad_area(regs, address);
556
557good_area:
558 if (unlikely(access_error(is_write, is_exec, vma)))
559 return bad_access(regs, address);
560
561
562
563
564
565
566 fault = handle_mm_fault(vma, address, flags);
567
568#ifdef CONFIG_PPC_MEM_KEYS
569
570
571
572
573 if (unlikely(fault & VM_FAULT_SIGSEGV) &&
574 !arch_vma_access_permitted(vma, is_write, is_exec, 0)) {
575
576 int pkey = vma_pkey(vma);
577
578 up_read(&mm->mmap_sem);
579 return bad_key_fault_exception(regs, address, pkey);
580 }
581#endif
582
583 major |= fault & VM_FAULT_MAJOR;
584
585 if (fault_signal_pending(fault, regs))
586 return user_mode(regs) ? 0 : SIGBUS;
587
588
589
590
591
592 if (unlikely(fault & VM_FAULT_RETRY)) {
593 if (flags & FAULT_FLAG_ALLOW_RETRY) {
594 flags |= FAULT_FLAG_TRIED;
595 goto retry;
596 }
597 }
598
599 up_read(¤t->mm->mmap_sem);
600
601 if (unlikely(fault & VM_FAULT_ERROR))
602 return mm_fault_error(regs, address, fault);
603
604
605
606
607 if (major) {
608 current->maj_flt++;
609 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
610 cmo_account_page_fault();
611 } else {
612 current->min_flt++;
613 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
614 }
615 return 0;
616}
617NOKPROBE_SYMBOL(__do_page_fault);
618
619int do_page_fault(struct pt_regs *regs, unsigned long address,
620 unsigned long error_code)
621{
622 enum ctx_state prev_state = exception_enter();
623 int rc = __do_page_fault(regs, address, error_code);
624 exception_exit(prev_state);
625 return rc;
626}
627NOKPROBE_SYMBOL(do_page_fault);
628
629
630
631
632
633
634void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
635{
636 const struct exception_table_entry *entry;
637 int is_write = page_fault_is_write(regs->dsisr);
638
639
640 if ((entry = search_exception_tables(regs->nip)) != NULL) {
641 regs->nip = extable_fixup(entry);
642 return;
643 }
644
645
646
647 switch (TRAP(regs)) {
648 case 0x300:
649 case 0x380:
650 case 0xe00:
651 pr_alert("BUG: %s on %s at 0x%08lx\n",
652 regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
653 "Unable to handle kernel data access",
654 is_write ? "write" : "read", regs->dar);
655 break;
656 case 0x400:
657 case 0x480:
658 pr_alert("BUG: Unable to handle kernel instruction fetch%s",
659 regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n");
660 break;
661 case 0x600:
662 pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n",
663 regs->dar);
664 break;
665 default:
666 pr_alert("BUG: Unable to handle unknown paging fault at 0x%08lx\n",
667 regs->dar);
668 break;
669 }
670 printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
671 regs->nip);
672
673 if (task_stack_end_corrupted(current))
674 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
675
676 die("Kernel access of bad area", regs, sig);
677}
678