1
2#include <linux/init.h>
3
4#include <linux/mm.h>
5#include <linux/spinlock.h>
6#include <linux/smp.h>
7#include <linux/interrupt.h>
8#include <linux/export.h>
9#include <linux/cpu.h>
10#include <linux/debugfs.h>
11
12#include <asm/tlbflush.h>
13#include <asm/mmu_context.h>
14#include <asm/nospec-branch.h>
15#include <asm/cache.h>
16#include <asm/apic.h>
17#include <asm/uv/uv.h>
18
19#include "mm_internal.h"
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39#define LAST_USER_MM_IBPB 0x1UL
40
41
42
43
44
45
46
47static void clear_asid_other(void)
48{
49 u16 asid;
50
51
52
53
54
55 if (!static_cpu_has(X86_FEATURE_PTI)) {
56 WARN_ON_ONCE(1);
57 return;
58 }
59
60 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
61
62 if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
63 continue;
64
65
66
67
68 this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
69 }
70 this_cpu_write(cpu_tlbstate.invalidate_other, false);
71}
72
73atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
74
75
76static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
77 u16 *new_asid, bool *need_flush)
78{
79 u16 asid;
80
81 if (!static_cpu_has(X86_FEATURE_PCID)) {
82 *new_asid = 0;
83 *need_flush = true;
84 return;
85 }
86
87 if (this_cpu_read(cpu_tlbstate.invalidate_other))
88 clear_asid_other();
89
90 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
91 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
92 next->context.ctx_id)
93 continue;
94
95 *new_asid = asid;
96 *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
97 next_tlb_gen);
98 return;
99 }
100
101
102
103
104
105 *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
106 if (*new_asid >= TLB_NR_DYN_ASIDS) {
107 *new_asid = 0;
108 this_cpu_write(cpu_tlbstate.next_asid, 1);
109 }
110 *need_flush = true;
111}
112
113static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
114{
115 unsigned long new_mm_cr3;
116
117 if (need_flush) {
118 invalidate_user_asid(new_asid);
119 new_mm_cr3 = build_cr3(pgdir, new_asid);
120 } else {
121 new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
122 }
123
124
125
126
127
128
129 write_cr3(new_mm_cr3);
130}
131
132void leave_mm(int cpu)
133{
134 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
135
136
137
138
139
140
141
142
143
144 if (loaded_mm == &init_mm)
145 return;
146
147
148 WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
149
150 switch_mm(NULL, &init_mm, NULL);
151}
152EXPORT_SYMBOL_GPL(leave_mm);
153
154void switch_mm(struct mm_struct *prev, struct mm_struct *next,
155 struct task_struct *tsk)
156{
157 unsigned long flags;
158
159 local_irq_save(flags);
160 switch_mm_irqs_off(prev, next, tsk);
161 local_irq_restore(flags);
162}
163
164static void sync_current_stack_to_mm(struct mm_struct *mm)
165{
166 unsigned long sp = current_stack_pointer;
167 pgd_t *pgd = pgd_offset(mm, sp);
168
169 if (pgtable_l5_enabled()) {
170 if (unlikely(pgd_none(*pgd))) {
171 pgd_t *pgd_ref = pgd_offset_k(sp);
172
173 set_pgd(pgd, *pgd_ref);
174 }
175 } else {
176
177
178
179
180
181 p4d_t *p4d = p4d_offset(pgd, sp);
182
183 if (unlikely(p4d_none(*p4d))) {
184 pgd_t *pgd_ref = pgd_offset_k(sp);
185 p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
186
187 set_p4d(p4d, *p4d_ref);
188 }
189 }
190}
191
192static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
193{
194 unsigned long next_tif = task_thread_info(next)->flags;
195 unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
196
197 return (unsigned long)next->mm | ibpb;
198}
199
200static void cond_ibpb(struct task_struct *next)
201{
202 if (!next || !next->mm)
203 return;
204
205
206
207
208
209
210
211
212
213
214 if (static_branch_likely(&switch_mm_cond_ibpb)) {
215 unsigned long prev_mm, next_mm;
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248 next_mm = mm_mangle_tif_spec_ib(next);
249 prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
250
251
252
253
254
255 if (next_mm != prev_mm &&
256 (next_mm | prev_mm) & LAST_USER_MM_IBPB)
257 indirect_branch_prediction_barrier();
258
259 this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
260 }
261
262 if (static_branch_unlikely(&switch_mm_always_ibpb)) {
263
264
265
266
267
268 if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
269 indirect_branch_prediction_barrier();
270 this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
271 }
272 }
273}
274
275void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
276 struct task_struct *tsk)
277{
278 struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
279 u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
280 bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
281 unsigned cpu = smp_processor_id();
282 u64 next_tlb_gen;
283 bool need_flush;
284 u16 new_asid;
285
286
287
288
289
290
291
292
293
294
295
296 if (IS_ENABLED(CONFIG_PROVE_LOCKING))
297 WARN_ON_ONCE(!irqs_disabled());
298
299
300
301
302
303
304
305
306
307
308#ifdef CONFIG_DEBUG_VM
309 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
310
311
312
313
314
315
316
317
318
319
320
321
322 __flush_tlb_all();
323 }
324#endif
325 this_cpu_write(cpu_tlbstate.is_lazy, false);
326
327
328
329
330
331
332
333 if (real_prev == next) {
334 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
335 next->context.ctx_id);
336
337
338
339
340
341
342 if (WARN_ON_ONCE(real_prev != &init_mm &&
343 !cpumask_test_cpu(cpu, mm_cpumask(next))))
344 cpumask_set_cpu(cpu, mm_cpumask(next));
345
346
347
348
349
350
351 if (!was_lazy)
352 return;
353
354
355
356
357
358
359
360 smp_mb();
361 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
362 if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
363 next_tlb_gen)
364 return;
365
366
367
368
369
370 new_asid = prev_asid;
371 need_flush = true;
372 } else {
373
374
375
376
377
378 cond_ibpb(tsk);
379
380 if (IS_ENABLED(CONFIG_VMAP_STACK)) {
381
382
383
384
385
386 sync_current_stack_to_mm(next);
387 }
388
389
390
391
392
393
394 if (real_prev != &init_mm) {
395 VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
396 mm_cpumask(real_prev)));
397 cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
398 }
399
400
401
402
403 if (next != &init_mm)
404 cpumask_set_cpu(cpu, mm_cpumask(next));
405 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
406
407 choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
408
409
410 this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
411 barrier();
412 }
413
414 if (need_flush) {
415 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
416 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
417 load_new_mm_cr3(next->pgd, new_asid, true);
418
419
420
421
422
423
424
425
426
427 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
428 } else {
429
430 load_new_mm_cr3(next->pgd, new_asid, false);
431
432
433 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
434 }
435
436
437 barrier();
438
439 this_cpu_write(cpu_tlbstate.loaded_mm, next);
440 this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
441
442 if (next != real_prev) {
443 load_mm_cr4_irqsoff(next);
444 switch_ldt(real_prev, next);
445 }
446}
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
462{
463 if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
464 return;
465
466 this_cpu_write(cpu_tlbstate.is_lazy, true);
467}
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482void initialize_tlbstate_and_flush(void)
483{
484 int i;
485 struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
486 u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
487 unsigned long cr3 = __read_cr3();
488
489
490 WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
491
492
493
494
495
496
497 WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
498 !(cr4_read_shadow() & X86_CR4_PCIDE));
499
500
501 write_cr3(build_cr3(mm->pgd, 0));
502
503
504 this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
505 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
506 this_cpu_write(cpu_tlbstate.next_asid, 1);
507 this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
508 this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
509
510 for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
511 this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
512}
513
514
515
516
517
518
519
520
521static void flush_tlb_func_common(const struct flush_tlb_info *f,
522 bool local, enum tlb_flush_reason reason)
523{
524
525
526
527
528
529
530
531
532
533 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
534 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
535 u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
536 u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
537
538
539 VM_WARN_ON(!irqs_disabled());
540
541 if (unlikely(loaded_mm == &init_mm))
542 return;
543
544 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
545 loaded_mm->context.ctx_id);
546
547 if (this_cpu_read(cpu_tlbstate.is_lazy)) {
548
549
550
551
552
553
554
555
556
557 switch_mm_irqs_off(NULL, &init_mm, NULL);
558 return;
559 }
560
561 if (unlikely(local_tlb_gen == mm_tlb_gen)) {
562
563
564
565
566
567
568 trace_tlb_flush(reason, 0);
569 return;
570 }
571
572 WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
573 WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612 if (f->end != TLB_FLUSH_ALL &&
613 f->new_tlb_gen == local_tlb_gen + 1 &&
614 f->new_tlb_gen == mm_tlb_gen) {
615
616 unsigned long nr_invalidate = (f->end - f->start) >> f->stride_shift;
617 unsigned long addr = f->start;
618
619 while (addr < f->end) {
620 __flush_tlb_one_user(addr);
621 addr += 1UL << f->stride_shift;
622 }
623 if (local)
624 count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate);
625 trace_tlb_flush(reason, nr_invalidate);
626 } else {
627
628 local_flush_tlb();
629 if (local)
630 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
631 trace_tlb_flush(reason, TLB_FLUSH_ALL);
632 }
633
634
635 this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
636}
637
638static void flush_tlb_func_local(const void *info, enum tlb_flush_reason reason)
639{
640 const struct flush_tlb_info *f = info;
641
642 flush_tlb_func_common(f, true, reason);
643}
644
645static void flush_tlb_func_remote(void *info)
646{
647 const struct flush_tlb_info *f = info;
648
649 inc_irq_stat(irq_tlb_count);
650
651 if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
652 return;
653
654 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
655 flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
656}
657
658static bool tlb_is_not_lazy(int cpu, void *data)
659{
660 return !per_cpu(cpu_tlbstate.is_lazy, cpu);
661}
662
663void native_flush_tlb_others(const struct cpumask *cpumask,
664 const struct flush_tlb_info *info)
665{
666 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
667 if (info->end == TLB_FLUSH_ALL)
668 trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
669 else
670 trace_tlb_flush(TLB_REMOTE_SEND_IPI,
671 (info->end - info->start) >> PAGE_SHIFT);
672
673 if (is_uv_system()) {
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689 cpumask = uv_flush_tlb_others(cpumask, info);
690 if (cpumask)
691 smp_call_function_many(cpumask, flush_tlb_func_remote,
692 (void *)info, 1);
693 return;
694 }
695
696
697
698
699
700
701
702
703
704
705
706 if (info->freed_tables)
707 smp_call_function_many(cpumask, flush_tlb_func_remote,
708 (void *)info, 1);
709 else
710 on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote,
711 (void *)info, 1, GFP_ATOMIC, cpumask);
712}
713
714
715
716
717
718
719
720
721
722
723
724unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
725
726static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
727
728#ifdef CONFIG_DEBUG_VM
729static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
730#endif
731
732static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
733 unsigned long start, unsigned long end,
734 unsigned int stride_shift, bool freed_tables,
735 u64 new_tlb_gen)
736{
737 struct flush_tlb_info *info = this_cpu_ptr(&flush_tlb_info);
738
739#ifdef CONFIG_DEBUG_VM
740
741
742
743
744
745 BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
746#endif
747
748 info->start = start;
749 info->end = end;
750 info->mm = mm;
751 info->stride_shift = stride_shift;
752 info->freed_tables = freed_tables;
753 info->new_tlb_gen = new_tlb_gen;
754
755 return info;
756}
757
758static inline void put_flush_tlb_info(void)
759{
760#ifdef CONFIG_DEBUG_VM
761
762 barrier();
763 this_cpu_dec(flush_tlb_info_idx);
764#endif
765}
766
767void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
768 unsigned long end, unsigned int stride_shift,
769 bool freed_tables)
770{
771 struct flush_tlb_info *info;
772 u64 new_tlb_gen;
773 int cpu;
774
775 cpu = get_cpu();
776
777
778 if ((end == TLB_FLUSH_ALL) ||
779 ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
780 start = 0;
781 end = TLB_FLUSH_ALL;
782 }
783
784
785 new_tlb_gen = inc_mm_tlb_gen(mm);
786
787 info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
788 new_tlb_gen);
789
790 if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
791 lockdep_assert_irqs_enabled();
792 local_irq_disable();
793 flush_tlb_func_local(info, TLB_LOCAL_MM_SHOOTDOWN);
794 local_irq_enable();
795 }
796
797 if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
798 flush_tlb_others(mm_cpumask(mm), info);
799
800 put_flush_tlb_info();
801 put_cpu();
802}
803
804
805static void do_flush_tlb_all(void *info)
806{
807 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
808 __flush_tlb_all();
809}
810
811void flush_tlb_all(void)
812{
813 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
814 on_each_cpu(do_flush_tlb_all, NULL, 1);
815}
816
817static void do_kernel_range_flush(void *info)
818{
819 struct flush_tlb_info *f = info;
820 unsigned long addr;
821
822
823 for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
824 __flush_tlb_one_kernel(addr);
825}
826
827void flush_tlb_kernel_range(unsigned long start, unsigned long end)
828{
829
830 if (end == TLB_FLUSH_ALL ||
831 (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
832 on_each_cpu(do_flush_tlb_all, NULL, 1);
833 } else {
834 struct flush_tlb_info *info;
835
836 preempt_disable();
837 info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
838
839 on_each_cpu(do_kernel_range_flush, info, 1);
840
841 put_flush_tlb_info();
842 preempt_enable();
843 }
844}
845
846
847
848
849
850
851
852static const struct flush_tlb_info full_flush_tlb_info = {
853 .mm = NULL,
854 .start = 0,
855 .end = TLB_FLUSH_ALL,
856};
857
858void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
859{
860 int cpu = get_cpu();
861
862 if (cpumask_test_cpu(cpu, &batch->cpumask)) {
863 lockdep_assert_irqs_enabled();
864 local_irq_disable();
865 flush_tlb_func_local(&full_flush_tlb_info, TLB_LOCAL_SHOOTDOWN);
866 local_irq_enable();
867 }
868
869 if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
870 flush_tlb_others(&batch->cpumask, &full_flush_tlb_info);
871
872 cpumask_clear(&batch->cpumask);
873
874 put_cpu();
875}
876
877static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
878 size_t count, loff_t *ppos)
879{
880 char buf[32];
881 unsigned int len;
882
883 len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
884 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
885}
886
887static ssize_t tlbflush_write_file(struct file *file,
888 const char __user *user_buf, size_t count, loff_t *ppos)
889{
890 char buf[32];
891 ssize_t len;
892 int ceiling;
893
894 len = min(count, sizeof(buf) - 1);
895 if (copy_from_user(buf, user_buf, len))
896 return -EFAULT;
897
898 buf[len] = '\0';
899 if (kstrtoint(buf, 0, &ceiling))
900 return -EINVAL;
901
902 if (ceiling < 0)
903 return -EINVAL;
904
905 tlb_single_page_flush_ceiling = ceiling;
906 return count;
907}
908
909static const struct file_operations fops_tlbflush = {
910 .read = tlbflush_read_file,
911 .write = tlbflush_write_file,
912 .llseek = default_llseek,
913};
914
915static int __init create_tlb_single_page_flush_ceiling(void)
916{
917 debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
918 arch_debugfs_dir, NULL, &fops_tlbflush);
919 return 0;
920}
921late_initcall(create_tlb_single_page_flush_ceiling);
922