1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/signal.h>
19#include <linux/sched.h>
20#include <linux/sched/task_stack.h>
21#include <linux/kernel.h>
22#include <linux/errno.h>
23#include <linux/string.h>
24#include <linux/types.h>
25#include <linux/ptrace.h>
26#include <linux/mman.h>
27#include <linux/mm.h>
28#include <linux/interrupt.h>
29#include <linux/highmem.h>
30#include <linux/extable.h>
31#include <linux/kprobes.h>
32#include <linux/kdebug.h>
33#include <linux/perf_event.h>
34#include <linux/ratelimit.h>
35#include <linux/context_tracking.h>
36#include <linux/hugetlb.h>
37#include <linux/uaccess.h>
38
39#include <asm/firmware.h>
40#include <asm/page.h>
41#include <asm/pgtable.h>
42#include <asm/mmu.h>
43#include <asm/mmu_context.h>
44#include <asm/tlbflush.h>
45#include <asm/siginfo.h>
46#include <asm/debug.h>
47
48static inline bool notify_page_fault(struct pt_regs *regs)
49{
50 bool ret = false;
51
52#ifdef CONFIG_KPROBES
53
54 if (!user_mode(regs)) {
55 preempt_disable();
56 if (kprobe_running() && kprobe_fault_handler(regs, 11))
57 ret = true;
58 preempt_enable();
59 }
60#endif
61
62 if (unlikely(debugger_fault_handler(regs)))
63 ret = true;
64
65 return ret;
66}
67
68
69
70
71
72static bool store_updates_sp(struct pt_regs *regs)
73{
74 unsigned int inst;
75
76 if (get_user(inst, (unsigned int __user *)regs->nip))
77 return false;
78
79 if (((inst >> 16) & 0x1f) != 1)
80 return false;
81
82 switch (inst >> 26) {
83 case 37:
84 case 39:
85 case 45:
86 case 53:
87 case 55:
88 return true;
89 case 62:
90 return (inst & 3) == 1;
91 case 31:
92
93 switch ((inst >> 1) & 0x3ff) {
94 case 181:
95 case 183:
96 case 247:
97 case 439:
98 case 695:
99 case 759:
100 return true;
101 }
102 }
103 return false;
104}
105
106
107
108
109static int
110__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code,
111 int pkey)
112{
113
114
115
116
117
118 if (!user_mode(regs))
119 return SIGSEGV;
120
121 _exception_pkey(SIGSEGV, regs, si_code, address, pkey);
122
123 return 0;
124}
125
126static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
127{
128 return __bad_area_nosemaphore(regs, address, SEGV_MAPERR, 0);
129}
130
131static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code,
132 int pkey)
133{
134 struct mm_struct *mm = current->mm;
135
136
137
138
139
140 up_read(&mm->mmap_sem);
141
142 return __bad_area_nosemaphore(regs, address, si_code, pkey);
143}
144
145static noinline int bad_area(struct pt_regs *regs, unsigned long address)
146{
147 return __bad_area(regs, address, SEGV_MAPERR, 0);
148}
149
150static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
151 int pkey)
152{
153 return __bad_area_nosemaphore(regs, address, SEGV_PKUERR, pkey);
154}
155
156static noinline int bad_access(struct pt_regs *regs, unsigned long address)
157{
158 return __bad_area(regs, address, SEGV_ACCERR, 0);
159}
160
161static int do_sigbus(struct pt_regs *regs, unsigned long address,
162 unsigned int fault)
163{
164 siginfo_t info;
165 unsigned int lsb = 0;
166
167 if (!user_mode(regs))
168 return SIGBUS;
169
170 current->thread.trap_nr = BUS_ADRERR;
171 info.si_signo = SIGBUS;
172 info.si_errno = 0;
173 info.si_code = BUS_ADRERR;
174 info.si_addr = (void __user *)address;
175#ifdef CONFIG_MEMORY_FAILURE
176 if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
177 pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
178 current->comm, current->pid, address);
179 info.si_code = BUS_MCEERR_AR;
180 }
181
182 if (fault & VM_FAULT_HWPOISON_LARGE)
183 lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
184 if (fault & VM_FAULT_HWPOISON)
185 lsb = PAGE_SHIFT;
186#endif
187 info.si_addr_lsb = lsb;
188 force_sig_info(SIGBUS, &info, current);
189 return 0;
190}
191
192static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
193{
194
195
196
197
198 if (fatal_signal_pending(current) && !user_mode(regs))
199 return SIGKILL;
200
201
202 if (fault & VM_FAULT_OOM) {
203
204
205
206
207 if (!user_mode(regs))
208 return SIGSEGV;
209 pagefault_out_of_memory();
210 } else {
211 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
212 VM_FAULT_HWPOISON_LARGE))
213 return do_sigbus(regs, addr, fault);
214 else if (fault & VM_FAULT_SIGSEGV)
215 return bad_area_nosemaphore(regs, addr);
216 else
217 BUG();
218 }
219 return 0;
220}
221
222
223static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
224 unsigned long address)
225{
226 if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT))) {
227 printk_ratelimited(KERN_CRIT "kernel tried to execute"
228 " exec-protected page (%lx) -"
229 "exploit attempt? (uid: %d)\n",
230 address, from_kuid(&init_user_ns,
231 current_uid()));
232 }
233 return is_exec || (address >= TASK_SIZE);
234}
235
236static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
237 struct vm_area_struct *vma,
238 bool store_update_sp)
239{
240
241
242
243
244
245
246
247
248
249 if (address + 0x100000 < vma->vm_end) {
250
251 struct pt_regs *uregs = current->thread.regs;
252 if (uregs == NULL)
253 return true;
254
255
256
257
258
259
260
261
262
263
264
265
266
267 if (address + 2048 < uregs->gpr[1] && !store_update_sp)
268 return true;
269 }
270 return false;
271}
272
273static bool access_error(bool is_write, bool is_exec,
274 struct vm_area_struct *vma)
275{
276
277
278
279
280
281
282
283
284
285
286 if (is_exec) {
287 return !(vma->vm_flags & VM_EXEC) &&
288 (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
289 !(vma->vm_flags & (VM_READ | VM_WRITE)));
290 }
291
292 if (is_write) {
293 if (unlikely(!(vma->vm_flags & VM_WRITE)))
294 return true;
295 return false;
296 }
297
298 if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
299 return true;
300
301
302
303
304
305
306 return false;
307}
308
309#ifdef CONFIG_PPC_SMLPAR
310static inline void cmo_account_page_fault(void)
311{
312 if (firmware_has_feature(FW_FEATURE_CMO)) {
313 u32 page_ins;
314
315 preempt_disable();
316 page_ins = be32_to_cpu(get_lppaca()->page_ins);
317 page_ins += 1 << PAGE_FACTOR;
318 get_lppaca()->page_ins = cpu_to_be32(page_ins);
319 preempt_enable();
320 }
321}
322#else
323static inline void cmo_account_page_fault(void) { }
324#endif
325
326#ifdef CONFIG_PPC_STD_MMU
327static void sanity_check_fault(bool is_write, unsigned long error_code)
328{
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358 if (!radix_enabled() && !is_write)
359 WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
360}
361#else
362static void sanity_check_fault(bool is_write, unsigned long error_code) { }
363#endif
364
365
366
367
368
369#if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
370#define page_fault_is_write(__err) ((__err) & ESR_DST)
371#define page_fault_is_bad(__err) (0)
372#else
373#define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE)
374#if defined(CONFIG_PPC_8xx)
375#define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G)
376#elif defined(CONFIG_PPC64)
377#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S)
378#else
379#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
380#endif
381#endif
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396static int __do_page_fault(struct pt_regs *regs, unsigned long address,
397 unsigned long error_code)
398{
399 struct vm_area_struct * vma;
400 struct mm_struct *mm = current->mm;
401 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
402 int is_exec = TRAP(regs) == 0x400;
403 int is_user = user_mode(regs);
404 int is_write = page_fault_is_write(error_code);
405 int fault, major = 0;
406 bool store_update_sp = false;
407
408 if (notify_page_fault(regs))
409 return 0;
410
411 if (unlikely(page_fault_is_bad(error_code))) {
412 if (is_user) {
413 _exception(SIGBUS, regs, BUS_OBJERR, address);
414 return 0;
415 }
416 return SIGBUS;
417 }
418
419
420 sanity_check_fault(is_write, error_code);
421
422
423
424
425
426 if (unlikely(!is_user && bad_kernel_fault(is_exec, error_code, address)))
427 return SIGSEGV;
428
429
430
431
432
433 if (unlikely(faulthandler_disabled() || !mm)) {
434 if (is_user)
435 printk_ratelimited(KERN_ERR "Page fault in user mode"
436 " with faulthandler_disabled()=%d"
437 " mm=%p\n",
438 faulthandler_disabled(), mm);
439 return bad_area_nosemaphore(regs, address);
440 }
441
442
443 if (!arch_irq_disabled_regs(regs))
444 local_irq_enable();
445
446 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
447
448 if (error_code & DSISR_KEYFAULT)
449 return bad_key_fault_exception(regs, address,
450 get_mm_addr_key(mm, address));
451
452
453
454
455
456
457 if (is_write && is_user)
458 store_update_sp = store_updates_sp(regs);
459
460 if (is_user)
461 flags |= FAULT_FLAG_USER;
462 if (is_write)
463 flags |= FAULT_FLAG_WRITE;
464 if (is_exec)
465 flags |= FAULT_FLAG_INSTRUCTION;
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482 if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
483 if (!is_user && !search_exception_tables(regs->nip))
484 return bad_area_nosemaphore(regs, address);
485
486retry:
487 down_read(&mm->mmap_sem);
488 } else {
489
490
491
492
493
494 might_sleep();
495 }
496
497 vma = find_vma(mm, address);
498 if (unlikely(!vma))
499 return bad_area(regs, address);
500 if (likely(vma->vm_start <= address))
501 goto good_area;
502 if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
503 return bad_area(regs, address);
504
505
506 if (unlikely(bad_stack_expansion(regs, address, vma, store_update_sp)))
507 return bad_area(regs, address);
508
509
510 if (unlikely(expand_stack(vma, address)))
511 return bad_area(regs, address);
512
513good_area:
514 if (unlikely(access_error(is_write, is_exec, vma)))
515 return bad_access(regs, address);
516
517
518
519
520
521
522 fault = handle_mm_fault(vma, address, flags);
523
524#ifdef CONFIG_PPC_MEM_KEYS
525
526
527
528
529 if (unlikely(fault & VM_FAULT_SIGSEGV) &&
530 !arch_vma_access_permitted(vma, is_write, is_exec, 0)) {
531
532 int pkey = vma_pkey(vma);
533
534 up_read(&mm->mmap_sem);
535 return bad_key_fault_exception(regs, address, pkey);
536 }
537#endif
538
539 major |= fault & VM_FAULT_MAJOR;
540
541
542
543
544
545 if (unlikely(fault & VM_FAULT_RETRY)) {
546
547 if (flags & FAULT_FLAG_ALLOW_RETRY) {
548
549
550
551
552 flags &= ~FAULT_FLAG_ALLOW_RETRY;
553 flags |= FAULT_FLAG_TRIED;
554 if (!fatal_signal_pending(current))
555 goto retry;
556 }
557
558
559
560
561
562 return is_user ? 0 : SIGBUS;
563 }
564
565 up_read(¤t->mm->mmap_sem);
566
567 if (unlikely(fault & VM_FAULT_ERROR))
568 return mm_fault_error(regs, address, fault);
569
570
571
572
573 if (major) {
574 current->maj_flt++;
575 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
576 cmo_account_page_fault();
577 } else {
578 current->min_flt++;
579 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
580 }
581 return 0;
582}
583NOKPROBE_SYMBOL(__do_page_fault);
584
585int do_page_fault(struct pt_regs *regs, unsigned long address,
586 unsigned long error_code)
587{
588 enum ctx_state prev_state = exception_enter();
589 int rc = __do_page_fault(regs, address, error_code);
590 exception_exit(prev_state);
591 return rc;
592}
593NOKPROBE_SYMBOL(do_page_fault);
594
595
596
597
598
599
600void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
601{
602 const struct exception_table_entry *entry;
603
604
605 if ((entry = search_exception_tables(regs->nip)) != NULL) {
606 regs->nip = extable_fixup(entry);
607 return;
608 }
609
610
611
612 switch (TRAP(regs)) {
613 case 0x300:
614 case 0x380:
615 printk(KERN_ALERT "Unable to handle kernel paging request for "
616 "data at address 0x%08lx\n", regs->dar);
617 break;
618 case 0x400:
619 case 0x480:
620 printk(KERN_ALERT "Unable to handle kernel paging request for "
621 "instruction fetch\n");
622 break;
623 case 0x600:
624 printk(KERN_ALERT "Unable to handle kernel paging request for "
625 "unaligned access at address 0x%08lx\n", regs->dar);
626 break;
627 default:
628 printk(KERN_ALERT "Unable to handle kernel paging request for "
629 "unknown fault\n");
630 break;
631 }
632 printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
633 regs->nip);
634
635 if (task_stack_end_corrupted(current))
636 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
637
638 die("Kernel access of bad area", regs, sig);
639}
640