1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/signal.h>
15#include <linux/sched.h>
16#include <linux/sched/task_stack.h>
17#include <linux/kernel.h>
18#include <linux/errno.h>
19#include <linux/string.h>
20#include <linux/types.h>
21#include <linux/pagemap.h>
22#include <linux/ptrace.h>
23#include <linux/mman.h>
24#include <linux/mm.h>
25#include <linux/interrupt.h>
26#include <linux/highmem.h>
27#include <linux/extable.h>
28#include <linux/kprobes.h>
29#include <linux/kdebug.h>
30#include <linux/perf_event.h>
31#include <linux/ratelimit.h>
32#include <linux/context_tracking.h>
33#include <linux/hugetlb.h>
34#include <linux/uaccess.h>
35#include <linux/kfence.h>
36#include <linux/pkeys.h>
37
38#include <asm/firmware.h>
39#include <asm/interrupt.h>
40#include <asm/page.h>
41#include <asm/mmu.h>
42#include <asm/mmu_context.h>
43#include <asm/siginfo.h>
44#include <asm/debug.h>
45#include <asm/kup.h>
46#include <asm/inst.h>
47
48
49
50
51
52
53static int
54__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
55{
56
57
58
59
60
61 if (!user_mode(regs))
62 return SIGSEGV;
63
64 _exception(SIGSEGV, regs, si_code, address);
65
66 return 0;
67}
68
69static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
70{
71 return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
72}
73
74static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
75{
76 struct mm_struct *mm = current->mm;
77
78
79
80
81
82 mmap_read_unlock(mm);
83
84 return __bad_area_nosemaphore(regs, address, si_code);
85}
86
87static noinline int bad_area(struct pt_regs *regs, unsigned long address)
88{
89 return __bad_area(regs, address, SEGV_MAPERR);
90}
91
92static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
93 struct vm_area_struct *vma)
94{
95 struct mm_struct *mm = current->mm;
96 int pkey;
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115 pkey = vma_pkey(vma);
116
117 mmap_read_unlock(mm);
118
119
120
121
122
123
124 if (!user_mode(regs))
125 return SIGSEGV;
126
127 _exception_pkey(regs, address, pkey);
128
129 return 0;
130}
131
132static noinline int bad_access(struct pt_regs *regs, unsigned long address)
133{
134 return __bad_area(regs, address, SEGV_ACCERR);
135}
136
137static int do_sigbus(struct pt_regs *regs, unsigned long address,
138 vm_fault_t fault)
139{
140 if (!user_mode(regs))
141 return SIGBUS;
142
143 current->thread.trap_nr = BUS_ADRERR;
144#ifdef CONFIG_MEMORY_FAILURE
145 if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
146 unsigned int lsb = 0;
147
148 pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
149 current->comm, current->pid, address);
150
151 if (fault & VM_FAULT_HWPOISON_LARGE)
152 lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
153 if (fault & VM_FAULT_HWPOISON)
154 lsb = PAGE_SHIFT;
155
156 force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
157 return 0;
158 }
159
160#endif
161 force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
162 return 0;
163}
164
165static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
166 vm_fault_t fault)
167{
168
169
170
171
172 if (fatal_signal_pending(current) && !user_mode(regs))
173 return SIGKILL;
174
175
176 if (fault & VM_FAULT_OOM) {
177
178
179
180
181 if (!user_mode(regs))
182 return SIGSEGV;
183 pagefault_out_of_memory();
184 } else {
185 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
186 VM_FAULT_HWPOISON_LARGE))
187 return do_sigbus(regs, addr, fault);
188 else if (fault & VM_FAULT_SIGSEGV)
189 return bad_area_nosemaphore(regs, addr);
190 else
191 BUG();
192 }
193 return 0;
194}
195
196
197static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
198 unsigned long address, bool is_write)
199{
200 int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
201
202 if (is_exec) {
203 pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n",
204 address >= TASK_SIZE ? "exec-protected" : "user",
205 address,
206 from_kuid(&init_user_ns, current_uid()));
207
208
209 return true;
210 }
211
212
213 if (address >= TASK_SIZE)
214 return true;
215
216
217 if (bad_kuap_fault(regs, address, is_write)) {
218 pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
219 is_write ? "write" : "read", address,
220 from_kuid(&init_user_ns, current_uid()));
221
222
223 if (!search_exception_tables(regs->nip))
224 return true;
225
226
227
228 return WARN(true, "Bug: %s fault blocked by KUAP!", is_write ? "Write" : "Read");
229 }
230
231
232 return false;
233}
234
235static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
236 struct vm_area_struct *vma)
237{
238
239
240
241
242
243 if (!arch_vma_access_permitted(vma, is_write, is_exec, 0))
244 return true;
245
246 return false;
247}
248
249static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma)
250{
251
252
253
254
255
256
257
258
259
260
261 if (is_exec) {
262 return !(vma->vm_flags & VM_EXEC) &&
263 (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
264 !(vma->vm_flags & (VM_READ | VM_WRITE)));
265 }
266
267 if (is_write) {
268 if (unlikely(!(vma->vm_flags & VM_WRITE)))
269 return true;
270 return false;
271 }
272
273 if (unlikely(!vma_is_accessible(vma)))
274 return true;
275
276
277
278
279
280
281 return false;
282}
283
284#ifdef CONFIG_PPC_SMLPAR
285static inline void cmo_account_page_fault(void)
286{
287 if (firmware_has_feature(FW_FEATURE_CMO)) {
288 u32 page_ins;
289
290 preempt_disable();
291 page_ins = be32_to_cpu(get_lppaca()->page_ins);
292 page_ins += 1 << PAGE_FACTOR;
293 get_lppaca()->page_ins = cpu_to_be32(page_ins);
294 preempt_enable();
295 }
296}
297#else
298static inline void cmo_account_page_fault(void) { }
299#endif
300
301static void sanity_check_fault(bool is_write, bool is_user,
302 unsigned long error_code, unsigned long address)
303{
304
305
306
307 if (is_user && address >= TASK_SIZE) {
308 if ((long)address == -1)
309 return;
310
311 pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n",
312 current->comm, current->pid, address,
313 from_kuid(&init_user_ns, current_uid()));
314 return;
315 }
316
317 if (!IS_ENABLED(CONFIG_PPC_BOOK3S))
318 return;
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349 if (radix_enabled() || is_write)
350 return;
351
352 WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
353}
354
355
356
357
358
359#if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
360#define page_fault_is_write(__err) ((__err) & ESR_DST)
361#else
362#define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE)
363#endif
364
365#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
366#define page_fault_is_bad(__err) (0)
367#elif defined(CONFIG_PPC_8xx)
368#define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G)
369#elif defined(CONFIG_PPC64)
370#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S)
371#else
372#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
373#endif
374
375
376
377
378
379
380
381
382
383
384
385
386static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
387 unsigned long error_code)
388{
389 struct vm_area_struct * vma;
390 struct mm_struct *mm = current->mm;
391 unsigned int flags = FAULT_FLAG_DEFAULT;
392 int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
393 int is_user = user_mode(regs);
394 int is_write = page_fault_is_write(error_code);
395 vm_fault_t fault, major = 0;
396 bool kprobe_fault = kprobe_page_fault(regs, 11);
397
398 if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
399 return 0;
400
401 if (unlikely(page_fault_is_bad(error_code))) {
402 if (is_user) {
403 _exception(SIGBUS, regs, BUS_OBJERR, address);
404 return 0;
405 }
406 return SIGBUS;
407 }
408
409
410 sanity_check_fault(is_write, is_user, error_code, address);
411
412
413
414
415
416
417 if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) {
418 if (kfence_handle_page_fault(address, is_write, regs))
419 return 0;
420
421 return SIGSEGV;
422 }
423
424
425
426
427
428 if (unlikely(faulthandler_disabled() || !mm)) {
429 if (is_user)
430 printk_ratelimited(KERN_ERR "Page fault in user mode"
431 " with faulthandler_disabled()=%d"
432 " mm=%p\n",
433 faulthandler_disabled(), mm);
434 return bad_area_nosemaphore(regs, address);
435 }
436
437 interrupt_cond_local_irq_enable(regs);
438
439 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
440
441
442
443
444
445
446 if (is_user)
447 flags |= FAULT_FLAG_USER;
448 if (is_write)
449 flags |= FAULT_FLAG_WRITE;
450 if (is_exec)
451 flags |= FAULT_FLAG_INSTRUCTION;
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468 if (unlikely(!mmap_read_trylock(mm))) {
469 if (!is_user && !search_exception_tables(regs->nip))
470 return bad_area_nosemaphore(regs, address);
471
472retry:
473 mmap_read_lock(mm);
474 } else {
475
476
477
478
479
480 might_sleep();
481 }
482
483 vma = find_vma(mm, address);
484 if (unlikely(!vma))
485 return bad_area(regs, address);
486
487 if (unlikely(vma->vm_start > address)) {
488 if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
489 return bad_area(regs, address);
490
491 if (unlikely(expand_stack(vma, address)))
492 return bad_area(regs, address);
493 }
494
495 if (unlikely(access_pkey_error(is_write, is_exec,
496 (error_code & DSISR_KEYFAULT), vma)))
497 return bad_access_pkey(regs, address, vma);
498
499 if (unlikely(access_error(is_write, is_exec, vma)))
500 return bad_access(regs, address);
501
502
503
504
505
506
507 fault = handle_mm_fault(vma, address, flags, regs);
508
509 major |= fault & VM_FAULT_MAJOR;
510
511 if (fault_signal_pending(fault, regs))
512 return user_mode(regs) ? 0 : SIGBUS;
513
514
515
516
517
518 if (unlikely(fault & VM_FAULT_RETRY)) {
519 if (flags & FAULT_FLAG_ALLOW_RETRY) {
520 flags |= FAULT_FLAG_TRIED;
521 goto retry;
522 }
523 }
524
525 mmap_read_unlock(current->mm);
526
527 if (unlikely(fault & VM_FAULT_ERROR))
528 return mm_fault_error(regs, address, fault);
529
530
531
532
533 if (major)
534 cmo_account_page_fault();
535
536 return 0;
537}
538NOKPROBE_SYMBOL(___do_page_fault);
539
540static __always_inline void __do_page_fault(struct pt_regs *regs)
541{
542 long err;
543
544 err = ___do_page_fault(regs, regs->dar, regs->dsisr);
545 if (unlikely(err))
546 bad_page_fault(regs, err);
547}
548
549DEFINE_INTERRUPT_HANDLER(do_page_fault)
550{
551 __do_page_fault(regs);
552}
553
554#ifdef CONFIG_PPC_BOOK3S_64
555
556void hash__do_page_fault(struct pt_regs *regs)
557{
558 __do_page_fault(regs);
559}
560NOKPROBE_SYMBOL(hash__do_page_fault);
561#endif
562
563
564
565
566
567
568static void __bad_page_fault(struct pt_regs *regs, int sig)
569{
570 int is_write = page_fault_is_write(regs->dsisr);
571
572
573
574 switch (TRAP(regs)) {
575 case INTERRUPT_DATA_STORAGE:
576 case INTERRUPT_DATA_SEGMENT:
577 case INTERRUPT_H_DATA_STORAGE:
578 pr_alert("BUG: %s on %s at 0x%08lx\n",
579 regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
580 "Unable to handle kernel data access",
581 is_write ? "write" : "read", regs->dar);
582 break;
583 case INTERRUPT_INST_STORAGE:
584 case INTERRUPT_INST_SEGMENT:
585 pr_alert("BUG: Unable to handle kernel instruction fetch%s",
586 regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n");
587 break;
588 case INTERRUPT_ALIGNMENT:
589 pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n",
590 regs->dar);
591 break;
592 default:
593 pr_alert("BUG: Unable to handle unknown paging fault at 0x%08lx\n",
594 regs->dar);
595 break;
596 }
597 printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
598 regs->nip);
599
600 if (task_stack_end_corrupted(current))
601 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
602
603 die("Kernel access of bad area", regs, sig);
604}
605
606void bad_page_fault(struct pt_regs *regs, int sig)
607{
608 const struct exception_table_entry *entry;
609
610
611 entry = search_exception_tables(instruction_pointer(regs));
612 if (entry)
613 instruction_pointer_set(regs, extable_fixup(entry));
614 else
615 __bad_page_fault(regs, sig);
616}
617
618#ifdef CONFIG_PPC_BOOK3S_64
619DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv)
620{
621 bad_page_fault(regs, SIGSEGV);
622}
623#endif
624