1#include <linux/init.h>
2
3#include <linux/mm.h>
4#include <linux/spinlock.h>
5#include <linux/smp.h>
6#include <linux/interrupt.h>
7#include <linux/export.h>
8#include <linux/cpu.h>
9#include <linux/debugfs.h>
10
11#include <asm/tlbflush.h>
12#include <asm/mmu_context.h>
13#include <asm/nospec-branch.h>
14#include <asm/cache.h>
15#include <asm/apic.h>
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#define LAST_USER_MM_IBPB 0x1UL
36
37
38
39
40
41
42
43void clear_asid_other(void)
44{
45 u16 asid;
46
47
48
49
50
51 if (!static_cpu_has(X86_FEATURE_PTI)) {
52 WARN_ON_ONCE(1);
53 return;
54 }
55
56 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
57
58 if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
59 continue;
60
61
62
63
64 this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
65 }
66 this_cpu_write(cpu_tlbstate.invalidate_other, false);
67}
68
69atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
70
71
72static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
73 u16 *new_asid, bool *need_flush)
74{
75 u16 asid;
76
77 if (!static_cpu_has(X86_FEATURE_PCID)) {
78 *new_asid = 0;
79 *need_flush = true;
80 return;
81 }
82
83 if (this_cpu_read(cpu_tlbstate.invalidate_other))
84 clear_asid_other();
85
86 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
87 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
88 next->context.ctx_id)
89 continue;
90
91 *new_asid = asid;
92 *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
93 next_tlb_gen);
94 return;
95 }
96
97
98
99
100
101 *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
102 if (*new_asid >= TLB_NR_DYN_ASIDS) {
103 *new_asid = 0;
104 this_cpu_write(cpu_tlbstate.next_asid, 1);
105 }
106 *need_flush = true;
107}
108
109static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
110{
111 unsigned long new_mm_cr3;
112
113 if (need_flush) {
114 invalidate_user_asid(new_asid);
115 new_mm_cr3 = build_cr3(pgdir, new_asid);
116 } else {
117 new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
118 }
119
120
121
122
123
124
125 write_cr3(new_mm_cr3);
126}
127
128void leave_mm(int cpu)
129{
130 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
131
132
133
134
135
136
137
138
139
140 if (loaded_mm == &init_mm)
141 return;
142
143
144 WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
145
146 switch_mm(NULL, &init_mm, NULL);
147}
148EXPORT_SYMBOL_GPL(leave_mm);
149
150void switch_mm(struct mm_struct *prev, struct mm_struct *next,
151 struct task_struct *tsk)
152{
153 unsigned long flags;
154
155 local_irq_save(flags);
156 switch_mm_irqs_off(prev, next, tsk);
157 local_irq_restore(flags);
158}
159
160static void sync_current_stack_to_mm(struct mm_struct *mm)
161{
162 unsigned long sp = current_stack_pointer;
163 pgd_t *pgd = pgd_offset(mm, sp);
164
165 if (pgtable_l5_enabled()) {
166 if (unlikely(pgd_none(*pgd))) {
167 pgd_t *pgd_ref = pgd_offset_k(sp);
168
169 set_pgd(pgd, *pgd_ref);
170 }
171 } else {
172
173
174
175
176
177 p4d_t *p4d = p4d_offset(pgd, sp);
178
179 if (unlikely(p4d_none(*p4d))) {
180 pgd_t *pgd_ref = pgd_offset_k(sp);
181 p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
182
183 set_p4d(p4d, *p4d_ref);
184 }
185 }
186}
187
188static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
189{
190 unsigned long next_tif = task_thread_info(next)->flags;
191 unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
192
193 return (unsigned long)next->mm | ibpb;
194}
195
196static void cond_ibpb(struct task_struct *next)
197{
198 if (!next || !next->mm)
199 return;
200
201
202
203
204
205
206
207
208
209
210 if (static_branch_likely(&switch_mm_cond_ibpb)) {
211 unsigned long prev_mm, next_mm;
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244 next_mm = mm_mangle_tif_spec_ib(next);
245 prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
246
247
248
249
250
251 if (next_mm != prev_mm &&
252 (next_mm | prev_mm) & LAST_USER_MM_IBPB)
253 indirect_branch_prediction_barrier();
254
255 this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
256 }
257
258 if (static_branch_unlikely(&switch_mm_always_ibpb)) {
259
260
261
262
263
264 if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
265 indirect_branch_prediction_barrier();
266 this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
267 }
268 }
269}
270
271void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
272 struct task_struct *tsk)
273{
274 struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
275 u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
276 unsigned cpu = smp_processor_id();
277 u64 next_tlb_gen;
278
279
280
281
282
283
284
285
286
287
288
289 if (IS_ENABLED(CONFIG_PROVE_LOCKING))
290 WARN_ON_ONCE(!irqs_disabled());
291
292
293
294
295
296
297
298
299
300
301#ifdef CONFIG_DEBUG_VM
302 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
303
304
305
306
307
308
309
310
311
312
313
314
315 __flush_tlb_all();
316 }
317#endif
318 this_cpu_write(cpu_tlbstate.is_lazy, false);
319
320
321
322
323
324
325
326 if (real_prev == next) {
327 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
328 next->context.ctx_id);
329
330
331
332
333
334
335
336
337 if (WARN_ON_ONCE(real_prev != &init_mm &&
338 !cpumask_test_cpu(cpu, mm_cpumask(next))))
339 cpumask_set_cpu(cpu, mm_cpumask(next));
340
341 return;
342 } else {
343 u16 new_asid;
344 bool need_flush;
345
346
347
348
349
350
351 cond_ibpb(tsk);
352
353 if (IS_ENABLED(CONFIG_VMAP_STACK)) {
354
355
356
357
358
359 sync_current_stack_to_mm(next);
360 }
361
362
363 VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
364 real_prev != &init_mm);
365 cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
366
367
368
369
370 cpumask_set_cpu(cpu, mm_cpumask(next));
371 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
372
373 choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
374
375
376 this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
377 barrier();
378
379 if (need_flush) {
380 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
381 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
382 load_new_mm_cr3(next->pgd, new_asid, true);
383
384 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
385 } else {
386
387 load_new_mm_cr3(next->pgd, new_asid, false);
388
389 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
390 }
391
392
393 barrier();
394
395 this_cpu_write(cpu_tlbstate.loaded_mm, next);
396 this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
397 }
398
399 load_mm_cr4(next);
400 switch_ldt(real_prev, next);
401}
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
417{
418 if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
419 return;
420
421 if (tlb_defer_switch_to_init_mm()) {
422
423
424
425
426
427
428
429
430
431 this_cpu_write(cpu_tlbstate.is_lazy, true);
432 } else {
433 switch_mm(NULL, &init_mm, NULL);
434 }
435}
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450void initialize_tlbstate_and_flush(void)
451{
452 int i;
453 struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
454 u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
455 unsigned long cr3 = __read_cr3();
456
457
458 WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
459
460
461
462
463
464
465 WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
466 !(cr4_read_shadow() & X86_CR4_PCIDE));
467
468
469 write_cr3(build_cr3(mm->pgd, 0));
470
471
472 this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
473 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
474 this_cpu_write(cpu_tlbstate.next_asid, 1);
475 this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
476 this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
477
478 for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
479 this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
480}
481
482
483
484
485
486
487
488
489static void flush_tlb_func_common(const struct flush_tlb_info *f,
490 bool local, enum tlb_flush_reason reason)
491{
492
493
494
495
496
497
498
499
500
501 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
502 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
503 u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
504 u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
505
506
507 VM_WARN_ON(!irqs_disabled());
508
509 if (unlikely(loaded_mm == &init_mm))
510 return;
511
512 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
513 loaded_mm->context.ctx_id);
514
515 if (this_cpu_read(cpu_tlbstate.is_lazy)) {
516
517
518
519
520
521
522 switch_mm_irqs_off(NULL, &init_mm, NULL);
523 return;
524 }
525
526 if (unlikely(local_tlb_gen == mm_tlb_gen)) {
527
528
529
530
531
532
533 trace_tlb_flush(reason, 0);
534 return;
535 }
536
537 WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
538 WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577 if (f->end != TLB_FLUSH_ALL &&
578 f->new_tlb_gen == local_tlb_gen + 1 &&
579 f->new_tlb_gen == mm_tlb_gen) {
580
581 unsigned long nr_invalidate = (f->end - f->start) >> f->stride_shift;
582 unsigned long addr = f->start;
583
584 while (addr < f->end) {
585 __flush_tlb_one_user(addr);
586 addr += 1UL << f->stride_shift;
587 }
588 if (local)
589 count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate);
590 trace_tlb_flush(reason, nr_invalidate);
591 } else {
592
593 local_flush_tlb();
594 if (local)
595 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
596 trace_tlb_flush(reason, TLB_FLUSH_ALL);
597 }
598
599
600 this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
601}
602
603static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
604{
605 const struct flush_tlb_info *f = info;
606
607 flush_tlb_func_common(f, true, reason);
608}
609
610static void flush_tlb_func_remote(void *info)
611{
612 const struct flush_tlb_info *f = info;
613
614 inc_irq_stat(irq_tlb_count);
615
616 if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
617 return;
618
619 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
620 flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
621}
622
623void native_flush_tlb_others(const struct cpumask *cpumask,
624 const struct flush_tlb_info *info)
625{
626 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
627 if (info->end == TLB_FLUSH_ALL)
628 trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
629 else
630 trace_tlb_flush(TLB_REMOTE_SEND_IPI,
631 (info->end - info->start) >> PAGE_SHIFT);
632
633 smp_call_function_many(cpumask, flush_tlb_func_remote,
634 (void *)info, 1);
635}
636
637
638
639
640
641
642
643
644
645
646
647static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
648
649void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
650 unsigned long end, unsigned int stride_shift,
651 bool freed_tables)
652{
653 int cpu;
654
655 struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
656 .mm = mm,
657 .stride_shift = stride_shift,
658 };
659
660 cpu = get_cpu();
661
662
663 info.new_tlb_gen = inc_mm_tlb_gen(mm);
664
665
666 if ((end != TLB_FLUSH_ALL) &&
667 ((end - start) >> stride_shift) <= tlb_single_page_flush_ceiling) {
668 info.start = start;
669 info.end = end;
670 } else {
671 info.start = 0UL;
672 info.end = TLB_FLUSH_ALL;
673 }
674
675 if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
676 VM_WARN_ON(irqs_disabled());
677 local_irq_disable();
678 flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
679 local_irq_enable();
680 }
681
682 if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
683 flush_tlb_others(mm_cpumask(mm), &info);
684
685 put_cpu();
686}
687
688
689static void do_flush_tlb_all(void *info)
690{
691 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
692 __flush_tlb_all();
693}
694
695void flush_tlb_all(void)
696{
697 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
698 on_each_cpu(do_flush_tlb_all, NULL, 1);
699}
700
701static void do_kernel_range_flush(void *info)
702{
703 struct flush_tlb_info *f = info;
704 unsigned long addr;
705
706
707 for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
708 __flush_tlb_one_kernel(addr);
709}
710
711void flush_tlb_kernel_range(unsigned long start, unsigned long end)
712{
713
714
715 if (end == TLB_FLUSH_ALL ||
716 (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
717 on_each_cpu(do_flush_tlb_all, NULL, 1);
718 } else {
719 struct flush_tlb_info info;
720 info.start = start;
721 info.end = end;
722 on_each_cpu(do_kernel_range_flush, &info, 1);
723 }
724}
725
726void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
727{
728 struct flush_tlb_info info = {
729 .mm = NULL,
730 .start = 0UL,
731 .end = TLB_FLUSH_ALL,
732 };
733
734 int cpu = get_cpu();
735
736 if (cpumask_test_cpu(cpu, &batch->cpumask)) {
737 VM_WARN_ON(irqs_disabled());
738 local_irq_disable();
739 flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
740 local_irq_enable();
741 }
742
743 if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
744 flush_tlb_others(&batch->cpumask, &info);
745
746 cpumask_clear(&batch->cpumask);
747
748 put_cpu();
749}
750
751static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
752 size_t count, loff_t *ppos)
753{
754 char buf[32];
755 unsigned int len;
756
757 len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
758 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
759}
760
761static ssize_t tlbflush_write_file(struct file *file,
762 const char __user *user_buf, size_t count, loff_t *ppos)
763{
764 char buf[32];
765 ssize_t len;
766 int ceiling;
767
768 len = min(count, sizeof(buf) - 1);
769 if (copy_from_user(buf, user_buf, len))
770 return -EFAULT;
771
772 buf[len] = '\0';
773 if (kstrtoint(buf, 0, &ceiling))
774 return -EINVAL;
775
776 if (ceiling < 0)
777 return -EINVAL;
778
779 tlb_single_page_flush_ceiling = ceiling;
780 return count;
781}
782
783static const struct file_operations fops_tlbflush = {
784 .read = tlbflush_read_file,
785 .write = tlbflush_write_file,
786 .llseek = default_llseek,
787};
788
789static int __init create_tlb_single_page_flush_ceiling(void)
790{
791 debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
792 arch_debugfs_dir, NULL, &fops_tlbflush);
793 return 0;
794}
795late_initcall(create_tlb_single_page_flush_ceiling);
796