1
2#include <linux/init.h>
3
4#include <linux/mm.h>
5#include <linux/spinlock.h>
6#include <linux/smp.h>
7#include <linux/interrupt.h>
8#include <linux/export.h>
9#include <linux/cpu.h>
10#include <linux/debugfs.h>
11#include <linux/sched/smt.h>
12#include <linux/task_work.h>
13
14#include <asm/tlbflush.h>
15#include <asm/mmu_context.h>
16#include <asm/nospec-branch.h>
17#include <asm/cache.h>
18#include <asm/cacheflush.h>
19#include <asm/apic.h>
20#include <asm/perf_event.h>
21
22#include "mm_internal.h"
23
24#ifdef CONFIG_PARAVIRT
25# define STATIC_NOPV
26#else
27# define STATIC_NOPV static
28# define __flush_tlb_local native_flush_tlb_local
29# define __flush_tlb_global native_flush_tlb_global
30# define __flush_tlb_one_user(addr) native_flush_tlb_one_user(addr)
31# define __flush_tlb_multi(msk, info) native_flush_tlb_multi(msk, info)
32#endif
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52#define LAST_USER_MM_IBPB 0x1UL
53#define LAST_USER_MM_L1D_FLUSH 0x2UL
54#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB | LAST_USER_MM_L1D_FLUSH)
55
56
57#define LAST_USER_MM_INIT LAST_USER_MM_IBPB
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88#define CR3_HW_ASID_BITS 12
89
90
91
92
93
94#ifdef CONFIG_PAGE_TABLE_ISOLATION
95# define PTI_CONSUMED_PCID_BITS 1
96#else
97# define PTI_CONSUMED_PCID_BITS 0
98#endif
99
100#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
101
102
103
104
105
106
107#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
108
109
110
111
112static inline u16 kern_pcid(u16 asid)
113{
114 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
115
116#ifdef CONFIG_PAGE_TABLE_ISOLATION
117
118
119
120
121 BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
122
123
124
125
126
127 VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
128#endif
129
130
131
132
133
134
135
136
137
138
139
140
141
142 return asid + 1;
143}
144
145
146
147
148static inline u16 user_pcid(u16 asid)
149{
150 u16 ret = kern_pcid(asid);
151#ifdef CONFIG_PAGE_TABLE_ISOLATION
152 ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
153#endif
154 return ret;
155}
156
157static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
158{
159 if (static_cpu_has(X86_FEATURE_PCID)) {
160 return __sme_pa(pgd) | kern_pcid(asid);
161 } else {
162 VM_WARN_ON_ONCE(asid != 0);
163 return __sme_pa(pgd);
164 }
165}
166
167static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
168{
169 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
170
171
172
173
174
175 VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
176 return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
177}
178
179
180
181
182
183
184
185static void clear_asid_other(void)
186{
187 u16 asid;
188
189
190
191
192
193 if (!static_cpu_has(X86_FEATURE_PTI)) {
194 WARN_ON_ONCE(1);
195 return;
196 }
197
198 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
199
200 if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
201 continue;
202
203
204
205
206 this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
207 }
208 this_cpu_write(cpu_tlbstate.invalidate_other, false);
209}
210
211atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
212
213
214static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
215 u16 *new_asid, bool *need_flush)
216{
217 u16 asid;
218
219 if (!static_cpu_has(X86_FEATURE_PCID)) {
220 *new_asid = 0;
221 *need_flush = true;
222 return;
223 }
224
225 if (this_cpu_read(cpu_tlbstate.invalidate_other))
226 clear_asid_other();
227
228 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
229 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
230 next->context.ctx_id)
231 continue;
232
233 *new_asid = asid;
234 *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
235 next_tlb_gen);
236 return;
237 }
238
239
240
241
242
243 *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
244 if (*new_asid >= TLB_NR_DYN_ASIDS) {
245 *new_asid = 0;
246 this_cpu_write(cpu_tlbstate.next_asid, 1);
247 }
248 *need_flush = true;
249}
250
251
252
253
254
255
256
257static inline void invalidate_user_asid(u16 asid)
258{
259
260 if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
261 return;
262
263
264
265
266
267 if (!cpu_feature_enabled(X86_FEATURE_PCID))
268 return;
269
270 if (!static_cpu_has(X86_FEATURE_PTI))
271 return;
272
273 __set_bit(kern_pcid(asid),
274 (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
275}
276
277static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
278{
279 unsigned long new_mm_cr3;
280
281 if (need_flush) {
282 invalidate_user_asid(new_asid);
283 new_mm_cr3 = build_cr3(pgdir, new_asid);
284 } else {
285 new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
286 }
287
288
289
290
291
292
293 write_cr3(new_mm_cr3);
294}
295
296void leave_mm(int cpu)
297{
298 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
299
300
301
302
303
304
305
306
307
308 if (loaded_mm == &init_mm)
309 return;
310
311
312 WARN_ON(!this_cpu_read(cpu_tlbstate_shared.is_lazy));
313
314 switch_mm(NULL, &init_mm, NULL);
315}
316EXPORT_SYMBOL_GPL(leave_mm);
317
318void switch_mm(struct mm_struct *prev, struct mm_struct *next,
319 struct task_struct *tsk)
320{
321 unsigned long flags;
322
323 local_irq_save(flags);
324 switch_mm_irqs_off(prev, next, tsk);
325 local_irq_restore(flags);
326}
327
328
329
330
331
332
333
334static void l1d_flush_force_sigbus(struct callback_head *ch)
335{
336 force_sig(SIGBUS);
337}
338
339static void l1d_flush_evaluate(unsigned long prev_mm, unsigned long next_mm,
340 struct task_struct *next)
341{
342
343 if (prev_mm & LAST_USER_MM_L1D_FLUSH)
344 wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
345
346
347 if (likely(!(next_mm & LAST_USER_MM_L1D_FLUSH)))
348 return;
349
350
351
352
353
354
355
356 if (this_cpu_read(cpu_info.smt_active)) {
357 clear_ti_thread_flag(&next->thread_info, TIF_SPEC_L1D_FLUSH);
358 next->l1d_flush_kill.func = l1d_flush_force_sigbus;
359 task_work_add(next, &next->l1d_flush_kill, TWA_RESUME);
360 }
361}
362
363static unsigned long mm_mangle_tif_spec_bits(struct task_struct *next)
364{
365 unsigned long next_tif = read_task_thread_flags(next);
366 unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_SPEC_MASK;
367
368
369
370
371
372 BUILD_BUG_ON(TIF_SPEC_L1D_FLUSH != TIF_SPEC_IB + 1);
373
374 return (unsigned long)next->mm | spec_bits;
375}
376
377static void cond_mitigation(struct task_struct *next)
378{
379 unsigned long prev_mm, next_mm;
380
381 if (!next || !next->mm)
382 return;
383
384 next_mm = mm_mangle_tif_spec_bits(next);
385 prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec);
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400 if (static_branch_likely(&switch_mm_cond_ibpb)) {
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435 if (next_mm != prev_mm &&
436 (next_mm | prev_mm) & LAST_USER_MM_IBPB)
437 indirect_branch_prediction_barrier();
438 }
439
440 if (static_branch_unlikely(&switch_mm_always_ibpb)) {
441
442
443
444
445
446 if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) !=
447 (unsigned long)next->mm)
448 indirect_branch_prediction_barrier();
449 }
450
451 if (static_branch_unlikely(&switch_mm_cond_l1d_flush)) {
452
453
454
455
456
457 if (unlikely((prev_mm | next_mm) & LAST_USER_MM_L1D_FLUSH))
458 l1d_flush_evaluate(prev_mm, next_mm, next);
459 }
460
461 this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm);
462}
463
464#ifdef CONFIG_PERF_EVENTS
465static inline void cr4_update_pce_mm(struct mm_struct *mm)
466{
467 if (static_branch_unlikely(&rdpmc_always_available_key) ||
468 (!static_branch_unlikely(&rdpmc_never_available_key) &&
469 atomic_read(&mm->context.perf_rdpmc_allowed))) {
470
471
472
473
474 perf_clear_dirty_counters();
475 cr4_set_bits_irqsoff(X86_CR4_PCE);
476 } else
477 cr4_clear_bits_irqsoff(X86_CR4_PCE);
478}
479
480void cr4_update_pce(void *ignored)
481{
482 cr4_update_pce_mm(this_cpu_read(cpu_tlbstate.loaded_mm));
483}
484
485#else
486static inline void cr4_update_pce_mm(struct mm_struct *mm) { }
487#endif
488
489void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
490 struct task_struct *tsk)
491{
492 struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
493 u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
494 bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
495 unsigned cpu = smp_processor_id();
496 u64 next_tlb_gen;
497 bool need_flush;
498 u16 new_asid;
499
500
501
502
503
504
505
506
507
508
509
510 if (IS_ENABLED(CONFIG_PROVE_LOCKING))
511 WARN_ON_ONCE(!irqs_disabled());
512
513
514
515
516
517
518
519
520
521
522#ifdef CONFIG_DEBUG_VM
523 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
524
525
526
527
528
529
530
531
532
533
534
535
536 __flush_tlb_all();
537 }
538#endif
539 if (was_lazy)
540 this_cpu_write(cpu_tlbstate_shared.is_lazy, false);
541
542
543
544
545
546
547
548
549
550
551
552
553
554 if (real_prev == next) {
555 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
556 next->context.ctx_id);
557
558
559
560
561
562
563 if (WARN_ON_ONCE(real_prev != &init_mm &&
564 !cpumask_test_cpu(cpu, mm_cpumask(next))))
565 cpumask_set_cpu(cpu, mm_cpumask(next));
566
567
568
569
570
571
572 if (!was_lazy)
573 return;
574
575
576
577
578
579
580
581 smp_mb();
582 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
583 if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
584 next_tlb_gen)
585 return;
586
587
588
589
590
591 new_asid = prev_asid;
592 need_flush = true;
593 } else {
594
595
596
597
598 cond_mitigation(tsk);
599
600
601
602
603
604
605 if (real_prev != &init_mm) {
606 VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
607 mm_cpumask(real_prev)));
608 cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
609 }
610
611
612
613
614 if (next != &init_mm)
615 cpumask_set_cpu(cpu, mm_cpumask(next));
616 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
617
618 choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
619
620
621 this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
622 barrier();
623 }
624
625 if (need_flush) {
626 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
627 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
628 load_new_mm_cr3(next->pgd, new_asid, true);
629
630 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
631 } else {
632
633 load_new_mm_cr3(next->pgd, new_asid, false);
634
635 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
636 }
637
638
639 barrier();
640
641 this_cpu_write(cpu_tlbstate.loaded_mm, next);
642 this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
643
644 if (next != real_prev) {
645 cr4_update_pce_mm(next);
646 switch_ldt(real_prev, next);
647 }
648}
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
664{
665 if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
666 return;
667
668 this_cpu_write(cpu_tlbstate_shared.is_lazy, true);
669}
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684void initialize_tlbstate_and_flush(void)
685{
686 int i;
687 struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
688 u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
689 unsigned long cr3 = __read_cr3();
690
691
692 WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
693
694
695
696
697
698
699 WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
700 !(cr4_read_shadow() & X86_CR4_PCIDE));
701
702
703 write_cr3(build_cr3(mm->pgd, 0));
704
705
706 this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
707 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
708 this_cpu_write(cpu_tlbstate.next_asid, 1);
709 this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
710 this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
711
712 for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
713 this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
714}
715
716
717
718
719
720
721
722
723static void flush_tlb_func(void *info)
724{
725
726
727
728
729
730
731
732
733
734 const struct flush_tlb_info *f = info;
735 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
736 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
737 u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
738 u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
739 bool local = smp_processor_id() == f->initiating_cpu;
740 unsigned long nr_invalidate = 0;
741
742
743 VM_WARN_ON(!irqs_disabled());
744
745 if (!local) {
746 inc_irq_stat(irq_tlb_count);
747 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
748
749
750 if (f->mm && f->mm != loaded_mm)
751 return;
752 }
753
754 if (unlikely(loaded_mm == &init_mm))
755 return;
756
757 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
758 loaded_mm->context.ctx_id);
759
760 if (this_cpu_read(cpu_tlbstate_shared.is_lazy)) {
761
762
763
764
765
766
767
768
769
770 switch_mm_irqs_off(NULL, &init_mm, NULL);
771 return;
772 }
773
774 if (unlikely(local_tlb_gen == mm_tlb_gen)) {
775
776
777
778
779
780
781 goto done;
782 }
783
784 WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
785 WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824 if (f->end != TLB_FLUSH_ALL &&
825 f->new_tlb_gen == local_tlb_gen + 1 &&
826 f->new_tlb_gen == mm_tlb_gen) {
827
828 unsigned long addr = f->start;
829
830 nr_invalidate = (f->end - f->start) >> f->stride_shift;
831
832 while (addr < f->end) {
833 flush_tlb_one_user(addr);
834 addr += 1UL << f->stride_shift;
835 }
836 if (local)
837 count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate);
838 } else {
839
840 nr_invalidate = TLB_FLUSH_ALL;
841
842 flush_tlb_local();
843 if (local)
844 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
845 }
846
847
848 this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
849
850
851done:
852 trace_tlb_flush(!local ? TLB_REMOTE_SHOOTDOWN :
853 (f->mm == NULL) ? TLB_LOCAL_SHOOTDOWN :
854 TLB_LOCAL_MM_SHOOTDOWN,
855 nr_invalidate);
856}
857
858static bool tlb_is_not_lazy(int cpu, void *data)
859{
860 return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
861}
862
863DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
864EXPORT_PER_CPU_SYMBOL(cpu_tlbstate_shared);
865
866STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
867 const struct flush_tlb_info *info)
868{
869
870
871
872
873
874 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
875 if (info->end == TLB_FLUSH_ALL)
876 trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
877 else
878 trace_tlb_flush(TLB_REMOTE_SEND_IPI,
879 (info->end - info->start) >> PAGE_SHIFT);
880
881
882
883
884
885
886
887
888
889
890
891 if (info->freed_tables)
892 on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
893 else
894 on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
895 (void *)info, 1, cpumask);
896}
897
898void flush_tlb_multi(const struct cpumask *cpumask,
899 const struct flush_tlb_info *info)
900{
901 __flush_tlb_multi(cpumask, info);
902}
903
904
905
906
907
908
909
910
911
912
913
914unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
915
916static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
917
918#ifdef CONFIG_DEBUG_VM
919static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
920#endif
921
922static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
923 unsigned long start, unsigned long end,
924 unsigned int stride_shift, bool freed_tables,
925 u64 new_tlb_gen)
926{
927 struct flush_tlb_info *info = this_cpu_ptr(&flush_tlb_info);
928
929#ifdef CONFIG_DEBUG_VM
930
931
932
933
934
935 BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
936#endif
937
938 info->start = start;
939 info->end = end;
940 info->mm = mm;
941 info->stride_shift = stride_shift;
942 info->freed_tables = freed_tables;
943 info->new_tlb_gen = new_tlb_gen;
944 info->initiating_cpu = smp_processor_id();
945
946 return info;
947}
948
949static void put_flush_tlb_info(void)
950{
951#ifdef CONFIG_DEBUG_VM
952
953 barrier();
954 this_cpu_dec(flush_tlb_info_idx);
955#endif
956}
957
958void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
959 unsigned long end, unsigned int stride_shift,
960 bool freed_tables)
961{
962 struct flush_tlb_info *info;
963 u64 new_tlb_gen;
964 int cpu;
965
966 cpu = get_cpu();
967
968
969 if ((end == TLB_FLUSH_ALL) ||
970 ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
971 start = 0;
972 end = TLB_FLUSH_ALL;
973 }
974
975
976 new_tlb_gen = inc_mm_tlb_gen(mm);
977
978 info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
979 new_tlb_gen);
980
981
982
983
984
985
986 if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
987 flush_tlb_multi(mm_cpumask(mm), info);
988 } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
989 lockdep_assert_irqs_enabled();
990 local_irq_disable();
991 flush_tlb_func(info);
992 local_irq_enable();
993 }
994
995 put_flush_tlb_info();
996 put_cpu();
997}
998
999
1000static void do_flush_tlb_all(void *info)
1001{
1002 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
1003 __flush_tlb_all();
1004}
1005
1006void flush_tlb_all(void)
1007{
1008 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
1009 on_each_cpu(do_flush_tlb_all, NULL, 1);
1010}
1011
1012static void do_kernel_range_flush(void *info)
1013{
1014 struct flush_tlb_info *f = info;
1015 unsigned long addr;
1016
1017
1018 for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
1019 flush_tlb_one_kernel(addr);
1020}
1021
1022void flush_tlb_kernel_range(unsigned long start, unsigned long end)
1023{
1024
1025 if (end == TLB_FLUSH_ALL ||
1026 (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
1027 on_each_cpu(do_flush_tlb_all, NULL, 1);
1028 } else {
1029 struct flush_tlb_info *info;
1030
1031 preempt_disable();
1032 info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
1033
1034 on_each_cpu(do_kernel_range_flush, info, 1);
1035
1036 put_flush_tlb_info();
1037 preempt_enable();
1038 }
1039}
1040
1041
1042
1043
1044
1045
1046
1047
1048unsigned long __get_current_cr3_fast(void)
1049{
1050 unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
1051 this_cpu_read(cpu_tlbstate.loaded_mm_asid));
1052
1053
1054 VM_WARN_ON(in_nmi() || preemptible());
1055
1056 VM_BUG_ON(cr3 != __read_cr3());
1057 return cr3;
1058}
1059EXPORT_SYMBOL_GPL(__get_current_cr3_fast);
1060
1061
1062
1063
1064void flush_tlb_one_kernel(unsigned long addr)
1065{
1066 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079 flush_tlb_one_user(addr);
1080
1081 if (!static_cpu_has(X86_FEATURE_PTI))
1082 return;
1083
1084
1085
1086
1087
1088
1089
1090 this_cpu_write(cpu_tlbstate.invalidate_other, true);
1091}
1092
1093
1094
1095
1096STATIC_NOPV void native_flush_tlb_one_user(unsigned long addr)
1097{
1098 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
1099
1100 asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
1101
1102 if (!static_cpu_has(X86_FEATURE_PTI))
1103 return;
1104
1105
1106
1107
1108
1109 if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
1110 invalidate_user_asid(loaded_mm_asid);
1111 else
1112 invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
1113}
1114
1115void flush_tlb_one_user(unsigned long addr)
1116{
1117 __flush_tlb_one_user(addr);
1118}
1119
1120
1121
1122
1123STATIC_NOPV void native_flush_tlb_global(void)
1124{
1125 unsigned long flags;
1126
1127 if (static_cpu_has(X86_FEATURE_INVPCID)) {
1128
1129
1130
1131
1132
1133
1134 invpcid_flush_all();
1135 return;
1136 }
1137
1138
1139
1140
1141
1142
1143 raw_local_irq_save(flags);
1144
1145 __native_tlb_flush_global(this_cpu_read(cpu_tlbstate.cr4));
1146
1147 raw_local_irq_restore(flags);
1148}
1149
1150
1151
1152
1153STATIC_NOPV void native_flush_tlb_local(void)
1154{
1155
1156
1157
1158
1159
1160 WARN_ON_ONCE(preemptible());
1161
1162 invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
1163
1164
1165 native_write_cr3(__native_read_cr3());
1166}
1167
1168void flush_tlb_local(void)
1169{
1170 __flush_tlb_local();
1171}
1172
1173
1174
1175
1176void __flush_tlb_all(void)
1177{
1178
1179
1180
1181
1182 VM_WARN_ON_ONCE(preemptible());
1183
1184 if (boot_cpu_has(X86_FEATURE_PGE)) {
1185 __flush_tlb_global();
1186 } else {
1187
1188
1189
1190 flush_tlb_local();
1191 }
1192}
1193EXPORT_SYMBOL_GPL(__flush_tlb_all);
1194
1195void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
1196{
1197 struct flush_tlb_info *info;
1198
1199 int cpu = get_cpu();
1200
1201 info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, 0);
1202
1203
1204
1205
1206
1207 if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
1208 flush_tlb_multi(&batch->cpumask, info);
1209 } else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
1210 lockdep_assert_irqs_enabled();
1211 local_irq_disable();
1212 flush_tlb_func(info);
1213 local_irq_enable();
1214 }
1215
1216 cpumask_clear(&batch->cpumask);
1217
1218 put_flush_tlb_info();
1219 put_cpu();
1220}
1221
1222
1223
1224
1225
1226
1227
1228
1229bool nmi_uaccess_okay(void)
1230{
1231 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
1232 struct mm_struct *current_mm = current->mm;
1233
1234 VM_WARN_ON_ONCE(!loaded_mm);
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246 if (loaded_mm != current_mm)
1247 return false;
1248
1249 VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
1250
1251 return true;
1252}
1253
1254static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
1255 size_t count, loff_t *ppos)
1256{
1257 char buf[32];
1258 unsigned int len;
1259
1260 len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
1261 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
1262}
1263
1264static ssize_t tlbflush_write_file(struct file *file,
1265 const char __user *user_buf, size_t count, loff_t *ppos)
1266{
1267 char buf[32];
1268 ssize_t len;
1269 int ceiling;
1270
1271 len = min(count, sizeof(buf) - 1);
1272 if (copy_from_user(buf, user_buf, len))
1273 return -EFAULT;
1274
1275 buf[len] = '\0';
1276 if (kstrtoint(buf, 0, &ceiling))
1277 return -EINVAL;
1278
1279 if (ceiling < 0)
1280 return -EINVAL;
1281
1282 tlb_single_page_flush_ceiling = ceiling;
1283 return count;
1284}
1285
1286static const struct file_operations fops_tlbflush = {
1287 .read = tlbflush_read_file,
1288 .write = tlbflush_write_file,
1289 .llseek = default_llseek,
1290};
1291
1292static int __init create_tlb_single_page_flush_ceiling(void)
1293{
1294 debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
1295 arch_debugfs_dir, NULL, &fops_tlbflush);
1296 return 0;
1297}
1298late_initcall(create_tlb_single_page_flush_ceiling);
1299