1
2#include <linux/init.h>
3
4#include <linux/mm.h>
5#include <linux/spinlock.h>
6#include <linux/smp.h>
7#include <linux/interrupt.h>
8#include <linux/export.h>
9#include <linux/cpu.h>
10#include <linux/debugfs.h>
11#include <linux/sched/smt.h>
12
13#include <asm/tlbflush.h>
14#include <asm/mmu_context.h>
15#include <asm/nospec-branch.h>
16#include <asm/cache.h>
17#include <asm/cacheflush.h>
18#include <asm/apic.h>
19#include <asm/perf_event.h>
20
21#include "mm_internal.h"
22
23#ifdef CONFIG_PARAVIRT
24# define STATIC_NOPV
25#else
26# define STATIC_NOPV static
27# define __flush_tlb_local native_flush_tlb_local
28# define __flush_tlb_global native_flush_tlb_global
29# define __flush_tlb_one_user(addr) native_flush_tlb_one_user(addr)
30# define __flush_tlb_multi(msk, info) native_flush_tlb_multi(msk, info)
31#endif
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51#define LAST_USER_MM_IBPB 0x1UL
52#define LAST_USER_MM_L1D_FLUSH 0x2UL
53#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB | LAST_USER_MM_L1D_FLUSH)
54
55
56#define LAST_USER_MM_INIT LAST_USER_MM_IBPB
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87#define CR3_HW_ASID_BITS 12
88
89
90
91
92
93#ifdef CONFIG_PAGE_TABLE_ISOLATION
94# define PTI_CONSUMED_PCID_BITS 1
95#else
96# define PTI_CONSUMED_PCID_BITS 0
97#endif
98
99#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
100
101
102
103
104
105
106#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
107
108
109
110
111static inline u16 kern_pcid(u16 asid)
112{
113 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
114
115#ifdef CONFIG_PAGE_TABLE_ISOLATION
116
117
118
119
120 BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
121
122
123
124
125
126 VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
127#endif
128
129
130
131
132
133
134
135
136
137
138
139
140
141 return asid + 1;
142}
143
144
145
146
147static inline u16 user_pcid(u16 asid)
148{
149 u16 ret = kern_pcid(asid);
150#ifdef CONFIG_PAGE_TABLE_ISOLATION
151 ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
152#endif
153 return ret;
154}
155
156static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
157{
158 if (static_cpu_has(X86_FEATURE_PCID)) {
159 return __sme_pa(pgd) | kern_pcid(asid);
160 } else {
161 VM_WARN_ON_ONCE(asid != 0);
162 return __sme_pa(pgd);
163 }
164}
165
166static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
167{
168 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
169
170
171
172
173
174 VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
175 return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
176}
177
178
179
180
181
182
183
184static void clear_asid_other(void)
185{
186 u16 asid;
187
188
189
190
191
192 if (!static_cpu_has(X86_FEATURE_PTI)) {
193 WARN_ON_ONCE(1);
194 return;
195 }
196
197 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
198
199 if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
200 continue;
201
202
203
204
205 this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
206 }
207 this_cpu_write(cpu_tlbstate.invalidate_other, false);
208}
209
210atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
211
212
213static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
214 u16 *new_asid, bool *need_flush)
215{
216 u16 asid;
217
218 if (!static_cpu_has(X86_FEATURE_PCID)) {
219 *new_asid = 0;
220 *need_flush = true;
221 return;
222 }
223
224 if (this_cpu_read(cpu_tlbstate.invalidate_other))
225 clear_asid_other();
226
227 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
228 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
229 next->context.ctx_id)
230 continue;
231
232 *new_asid = asid;
233 *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
234 next_tlb_gen);
235 return;
236 }
237
238
239
240
241
242 *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
243 if (*new_asid >= TLB_NR_DYN_ASIDS) {
244 *new_asid = 0;
245 this_cpu_write(cpu_tlbstate.next_asid, 1);
246 }
247 *need_flush = true;
248}
249
250
251
252
253
254
255
256static inline void invalidate_user_asid(u16 asid)
257{
258
259 if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
260 return;
261
262
263
264
265
266 if (!cpu_feature_enabled(X86_FEATURE_PCID))
267 return;
268
269 if (!static_cpu_has(X86_FEATURE_PTI))
270 return;
271
272 __set_bit(kern_pcid(asid),
273 (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
274}
275
276static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
277{
278 unsigned long new_mm_cr3;
279
280 if (need_flush) {
281 invalidate_user_asid(new_asid);
282 new_mm_cr3 = build_cr3(pgdir, new_asid);
283 } else {
284 new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
285 }
286
287
288
289
290
291
292 write_cr3(new_mm_cr3);
293}
294
295void leave_mm(int cpu)
296{
297 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
298
299
300
301
302
303
304
305
306
307 if (loaded_mm == &init_mm)
308 return;
309
310
311 WARN_ON(!this_cpu_read(cpu_tlbstate_shared.is_lazy));
312
313 switch_mm(NULL, &init_mm, NULL);
314}
315EXPORT_SYMBOL_GPL(leave_mm);
316
317void switch_mm(struct mm_struct *prev, struct mm_struct *next,
318 struct task_struct *tsk)
319{
320 unsigned long flags;
321
322 local_irq_save(flags);
323 switch_mm_irqs_off(prev, next, tsk);
324 local_irq_restore(flags);
325}
326
327
328
329
330
331
332
333static void l1d_flush_force_sigbus(struct callback_head *ch)
334{
335 force_sig(SIGBUS);
336}
337
338static void l1d_flush_evaluate(unsigned long prev_mm, unsigned long next_mm,
339 struct task_struct *next)
340{
341
342 if (prev_mm & LAST_USER_MM_L1D_FLUSH)
343 wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
344
345
346 if (likely(!(next_mm & LAST_USER_MM_L1D_FLUSH)))
347 return;
348
349
350
351
352
353
354
355 if (this_cpu_read(cpu_info.smt_active)) {
356 clear_ti_thread_flag(&next->thread_info, TIF_SPEC_L1D_FLUSH);
357 next->l1d_flush_kill.func = l1d_flush_force_sigbus;
358 task_work_add(next, &next->l1d_flush_kill, TWA_RESUME);
359 }
360}
361
362static unsigned long mm_mangle_tif_spec_bits(struct task_struct *next)
363{
364 unsigned long next_tif = task_thread_info(next)->flags;
365 unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_SPEC_MASK;
366
367
368
369
370
371 BUILD_BUG_ON(TIF_SPEC_L1D_FLUSH != TIF_SPEC_IB + 1);
372
373 return (unsigned long)next->mm | spec_bits;
374}
375
376static void cond_mitigation(struct task_struct *next)
377{
378 unsigned long prev_mm, next_mm;
379
380 if (!next || !next->mm)
381 return;
382
383 next_mm = mm_mangle_tif_spec_bits(next);
384 prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec);
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399 if (static_branch_likely(&switch_mm_cond_ibpb)) {
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434 if (next_mm != prev_mm &&
435 (next_mm | prev_mm) & LAST_USER_MM_IBPB)
436 indirect_branch_prediction_barrier();
437 }
438
439 if (static_branch_unlikely(&switch_mm_always_ibpb)) {
440
441
442
443
444
445 if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) !=
446 (unsigned long)next->mm)
447 indirect_branch_prediction_barrier();
448 }
449
450 if (static_branch_unlikely(&switch_mm_cond_l1d_flush)) {
451
452
453
454
455
456 if (unlikely((prev_mm | next_mm) & LAST_USER_MM_L1D_FLUSH))
457 l1d_flush_evaluate(prev_mm, next_mm, next);
458 }
459
460 this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm);
461}
462
463#ifdef CONFIG_PERF_EVENTS
464static inline void cr4_update_pce_mm(struct mm_struct *mm)
465{
466 if (static_branch_unlikely(&rdpmc_always_available_key) ||
467 (!static_branch_unlikely(&rdpmc_never_available_key) &&
468 atomic_read(&mm->context.perf_rdpmc_allowed))) {
469
470
471
472
473 perf_clear_dirty_counters();
474 cr4_set_bits_irqsoff(X86_CR4_PCE);
475 } else
476 cr4_clear_bits_irqsoff(X86_CR4_PCE);
477}
478
479void cr4_update_pce(void *ignored)
480{
481 cr4_update_pce_mm(this_cpu_read(cpu_tlbstate.loaded_mm));
482}
483
484#else
485static inline void cr4_update_pce_mm(struct mm_struct *mm) { }
486#endif
487
488void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
489 struct task_struct *tsk)
490{
491 struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
492 u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
493 bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
494 unsigned cpu = smp_processor_id();
495 u64 next_tlb_gen;
496 bool need_flush;
497 u16 new_asid;
498
499
500
501
502
503
504
505
506
507
508
509 if (IS_ENABLED(CONFIG_PROVE_LOCKING))
510 WARN_ON_ONCE(!irqs_disabled());
511
512
513
514
515
516
517
518
519
520
521#ifdef CONFIG_DEBUG_VM
522 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
523
524
525
526
527
528
529
530
531
532
533
534
535 __flush_tlb_all();
536 }
537#endif
538 if (was_lazy)
539 this_cpu_write(cpu_tlbstate_shared.is_lazy, false);
540
541
542
543
544
545
546
547
548
549
550
551
552
553 if (real_prev == next) {
554 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
555 next->context.ctx_id);
556
557
558
559
560
561
562 if (WARN_ON_ONCE(real_prev != &init_mm &&
563 !cpumask_test_cpu(cpu, mm_cpumask(next))))
564 cpumask_set_cpu(cpu, mm_cpumask(next));
565
566
567
568
569
570
571 if (!was_lazy)
572 return;
573
574
575
576
577
578
579
580 smp_mb();
581 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
582 if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
583 next_tlb_gen)
584 return;
585
586
587
588
589
590 new_asid = prev_asid;
591 need_flush = true;
592 } else {
593
594
595
596
597 cond_mitigation(tsk);
598
599
600
601
602
603
604 if (real_prev != &init_mm) {
605 VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
606 mm_cpumask(real_prev)));
607 cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
608 }
609
610
611
612
613 if (next != &init_mm)
614 cpumask_set_cpu(cpu, mm_cpumask(next));
615 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
616
617 choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
618
619
620 this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
621 barrier();
622 }
623
624 if (need_flush) {
625 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
626 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
627 load_new_mm_cr3(next->pgd, new_asid, true);
628
629 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
630 } else {
631
632 load_new_mm_cr3(next->pgd, new_asid, false);
633
634 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
635 }
636
637
638 barrier();
639
640 this_cpu_write(cpu_tlbstate.loaded_mm, next);
641 this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
642
643 if (next != real_prev) {
644 cr4_update_pce_mm(next);
645 switch_ldt(real_prev, next);
646 }
647}
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
663{
664 if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
665 return;
666
667 this_cpu_write(cpu_tlbstate_shared.is_lazy, true);
668}
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683void initialize_tlbstate_and_flush(void)
684{
685 int i;
686 struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
687 u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
688 unsigned long cr3 = __read_cr3();
689
690
691 WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
692
693
694
695
696
697
698 WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
699 !(cr4_read_shadow() & X86_CR4_PCIDE));
700
701
702 write_cr3(build_cr3(mm->pgd, 0));
703
704
705 this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
706 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
707 this_cpu_write(cpu_tlbstate.next_asid, 1);
708 this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
709 this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
710
711 for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
712 this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
713}
714
715
716
717
718
719
720
721
722static void flush_tlb_func(void *info)
723{
724
725
726
727
728
729
730
731
732
733 const struct flush_tlb_info *f = info;
734 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
735 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
736 u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
737 u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
738 bool local = smp_processor_id() == f->initiating_cpu;
739 unsigned long nr_invalidate = 0;
740
741
742 VM_WARN_ON(!irqs_disabled());
743
744 if (!local) {
745 inc_irq_stat(irq_tlb_count);
746 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
747
748
749 if (f->mm && f->mm != loaded_mm)
750 return;
751 }
752
753 if (unlikely(loaded_mm == &init_mm))
754 return;
755
756 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
757 loaded_mm->context.ctx_id);
758
759 if (this_cpu_read(cpu_tlbstate_shared.is_lazy)) {
760
761
762
763
764
765
766
767
768
769 switch_mm_irqs_off(NULL, &init_mm, NULL);
770 return;
771 }
772
773 if (unlikely(local_tlb_gen == mm_tlb_gen)) {
774
775
776
777
778
779
780 goto done;
781 }
782
783 WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
784 WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823 if (f->end != TLB_FLUSH_ALL &&
824 f->new_tlb_gen == local_tlb_gen + 1 &&
825 f->new_tlb_gen == mm_tlb_gen) {
826
827 unsigned long addr = f->start;
828
829 nr_invalidate = (f->end - f->start) >> f->stride_shift;
830
831 while (addr < f->end) {
832 flush_tlb_one_user(addr);
833 addr += 1UL << f->stride_shift;
834 }
835 if (local)
836 count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate);
837 } else {
838
839 nr_invalidate = TLB_FLUSH_ALL;
840
841 flush_tlb_local();
842 if (local)
843 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
844 }
845
846
847 this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
848
849
850done:
851 trace_tlb_flush(!local ? TLB_REMOTE_SHOOTDOWN :
852 (f->mm == NULL) ? TLB_LOCAL_SHOOTDOWN :
853 TLB_LOCAL_MM_SHOOTDOWN,
854 nr_invalidate);
855}
856
857static bool tlb_is_not_lazy(int cpu)
858{
859 return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
860}
861
862static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
863
864DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
865EXPORT_PER_CPU_SYMBOL(cpu_tlbstate_shared);
866
867STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
868 const struct flush_tlb_info *info)
869{
870
871
872
873
874
875 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
876 if (info->end == TLB_FLUSH_ALL)
877 trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
878 else
879 trace_tlb_flush(TLB_REMOTE_SEND_IPI,
880 (info->end - info->start) >> PAGE_SHIFT);
881
882
883
884
885
886
887
888
889
890
891
892 if (info->freed_tables) {
893 on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
894 } else {
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911 struct cpumask *cond_cpumask = this_cpu_ptr(&flush_tlb_mask);
912 int cpu;
913
914 cpumask_clear(cond_cpumask);
915
916 for_each_cpu(cpu, cpumask) {
917 if (tlb_is_not_lazy(cpu))
918 __cpumask_set_cpu(cpu, cond_cpumask);
919 }
920 on_each_cpu_mask(cond_cpumask, flush_tlb_func, (void *)info, true);
921 }
922}
923
924void flush_tlb_multi(const struct cpumask *cpumask,
925 const struct flush_tlb_info *info)
926{
927 __flush_tlb_multi(cpumask, info);
928}
929
930
931
932
933
934
935
936
937
938
939
940unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
941
942static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
943
944#ifdef CONFIG_DEBUG_VM
945static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
946#endif
947
948static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
949 unsigned long start, unsigned long end,
950 unsigned int stride_shift, bool freed_tables,
951 u64 new_tlb_gen)
952{
953 struct flush_tlb_info *info = this_cpu_ptr(&flush_tlb_info);
954
955#ifdef CONFIG_DEBUG_VM
956
957
958
959
960
961 BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
962#endif
963
964 info->start = start;
965 info->end = end;
966 info->mm = mm;
967 info->stride_shift = stride_shift;
968 info->freed_tables = freed_tables;
969 info->new_tlb_gen = new_tlb_gen;
970 info->initiating_cpu = smp_processor_id();
971
972 return info;
973}
974
975static void put_flush_tlb_info(void)
976{
977#ifdef CONFIG_DEBUG_VM
978
979 barrier();
980 this_cpu_dec(flush_tlb_info_idx);
981#endif
982}
983
984void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
985 unsigned long end, unsigned int stride_shift,
986 bool freed_tables)
987{
988 struct flush_tlb_info *info;
989 u64 new_tlb_gen;
990 int cpu;
991
992 cpu = get_cpu();
993
994
995 if ((end == TLB_FLUSH_ALL) ||
996 ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
997 start = 0;
998 end = TLB_FLUSH_ALL;
999 }
1000
1001
1002 new_tlb_gen = inc_mm_tlb_gen(mm);
1003
1004 info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
1005 new_tlb_gen);
1006
1007
1008
1009
1010
1011
1012 if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
1013 flush_tlb_multi(mm_cpumask(mm), info);
1014 } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
1015 lockdep_assert_irqs_enabled();
1016 local_irq_disable();
1017 flush_tlb_func(info);
1018 local_irq_enable();
1019 }
1020
1021 put_flush_tlb_info();
1022 put_cpu();
1023}
1024
1025
1026static void do_flush_tlb_all(void *info)
1027{
1028 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
1029 __flush_tlb_all();
1030}
1031
1032void flush_tlb_all(void)
1033{
1034 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
1035 on_each_cpu(do_flush_tlb_all, NULL, 1);
1036}
1037
1038static void do_kernel_range_flush(void *info)
1039{
1040 struct flush_tlb_info *f = info;
1041 unsigned long addr;
1042
1043
1044 for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
1045 flush_tlb_one_kernel(addr);
1046}
1047
1048void flush_tlb_kernel_range(unsigned long start, unsigned long end)
1049{
1050
1051 if (end == TLB_FLUSH_ALL ||
1052 (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
1053 on_each_cpu(do_flush_tlb_all, NULL, 1);
1054 } else {
1055 struct flush_tlb_info *info;
1056
1057 preempt_disable();
1058 info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
1059
1060 on_each_cpu(do_kernel_range_flush, info, 1);
1061
1062 put_flush_tlb_info();
1063 preempt_enable();
1064 }
1065}
1066
1067
1068
1069
1070
1071
1072
1073
1074unsigned long __get_current_cr3_fast(void)
1075{
1076 unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
1077 this_cpu_read(cpu_tlbstate.loaded_mm_asid));
1078
1079
1080 VM_WARN_ON(in_nmi() || preemptible());
1081
1082 VM_BUG_ON(cr3 != __read_cr3());
1083 return cr3;
1084}
1085EXPORT_SYMBOL_GPL(__get_current_cr3_fast);
1086
1087
1088
1089
1090void flush_tlb_one_kernel(unsigned long addr)
1091{
1092 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105 flush_tlb_one_user(addr);
1106
1107 if (!static_cpu_has(X86_FEATURE_PTI))
1108 return;
1109
1110
1111
1112
1113
1114
1115
1116 this_cpu_write(cpu_tlbstate.invalidate_other, true);
1117}
1118
1119
1120
1121
1122STATIC_NOPV void native_flush_tlb_one_user(unsigned long addr)
1123{
1124 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
1125
1126 asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
1127
1128 if (!static_cpu_has(X86_FEATURE_PTI))
1129 return;
1130
1131
1132
1133
1134
1135 if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
1136 invalidate_user_asid(loaded_mm_asid);
1137 else
1138 invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
1139}
1140
1141void flush_tlb_one_user(unsigned long addr)
1142{
1143 __flush_tlb_one_user(addr);
1144}
1145
1146
1147
1148
1149STATIC_NOPV void native_flush_tlb_global(void)
1150{
1151 unsigned long cr4, flags;
1152
1153 if (static_cpu_has(X86_FEATURE_INVPCID)) {
1154
1155
1156
1157
1158
1159
1160 invpcid_flush_all();
1161 return;
1162 }
1163
1164
1165
1166
1167
1168
1169 raw_local_irq_save(flags);
1170
1171 cr4 = this_cpu_read(cpu_tlbstate.cr4);
1172
1173 native_write_cr4(cr4 ^ X86_CR4_PGE);
1174
1175 native_write_cr4(cr4);
1176
1177 raw_local_irq_restore(flags);
1178}
1179
1180
1181
1182
1183STATIC_NOPV void native_flush_tlb_local(void)
1184{
1185
1186
1187
1188
1189
1190 WARN_ON_ONCE(preemptible());
1191
1192 invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
1193
1194
1195 native_write_cr3(__native_read_cr3());
1196}
1197
1198void flush_tlb_local(void)
1199{
1200 __flush_tlb_local();
1201}
1202
1203
1204
1205
1206void __flush_tlb_all(void)
1207{
1208
1209
1210
1211
1212 VM_WARN_ON_ONCE(preemptible());
1213
1214 if (boot_cpu_has(X86_FEATURE_PGE)) {
1215 __flush_tlb_global();
1216 } else {
1217
1218
1219
1220 flush_tlb_local();
1221 }
1222}
1223EXPORT_SYMBOL_GPL(__flush_tlb_all);
1224
1225void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
1226{
1227 struct flush_tlb_info *info;
1228
1229 int cpu = get_cpu();
1230
1231 info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, 0);
1232
1233
1234
1235
1236
1237 if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
1238 flush_tlb_multi(&batch->cpumask, info);
1239 } else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
1240 lockdep_assert_irqs_enabled();
1241 local_irq_disable();
1242 flush_tlb_func(info);
1243 local_irq_enable();
1244 }
1245
1246 cpumask_clear(&batch->cpumask);
1247
1248 put_flush_tlb_info();
1249 put_cpu();
1250}
1251
1252
1253
1254
1255
1256
1257
1258
1259bool nmi_uaccess_okay(void)
1260{
1261 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
1262 struct mm_struct *current_mm = current->mm;
1263
1264 VM_WARN_ON_ONCE(!loaded_mm);
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276 if (loaded_mm != current_mm)
1277 return false;
1278
1279 VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
1280
1281 return true;
1282}
1283
1284static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
1285 size_t count, loff_t *ppos)
1286{
1287 char buf[32];
1288 unsigned int len;
1289
1290 len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
1291 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
1292}
1293
1294static ssize_t tlbflush_write_file(struct file *file,
1295 const char __user *user_buf, size_t count, loff_t *ppos)
1296{
1297 char buf[32];
1298 ssize_t len;
1299 int ceiling;
1300
1301 len = min(count, sizeof(buf) - 1);
1302 if (copy_from_user(buf, user_buf, len))
1303 return -EFAULT;
1304
1305 buf[len] = '\0';
1306 if (kstrtoint(buf, 0, &ceiling))
1307 return -EINVAL;
1308
1309 if (ceiling < 0)
1310 return -EINVAL;
1311
1312 tlb_single_page_flush_ceiling = ceiling;
1313 return count;
1314}
1315
1316static const struct file_operations fops_tlbflush = {
1317 .read = tlbflush_read_file,
1318 .write = tlbflush_write_file,
1319 .llseek = default_llseek,
1320};
1321
1322static int __init create_tlb_single_page_flush_ceiling(void)
1323{
1324 debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
1325 arch_debugfs_dir, NULL, &fops_tlbflush);
1326 return 0;
1327}
1328late_initcall(create_tlb_single_page_flush_ceiling);
1329