1#include <linux/init.h>
2
3#include <linux/mm.h>
4#include <linux/spinlock.h>
5#include <linux/smp.h>
6#include <linux/interrupt.h>
7#include <linux/export.h>
8#include <linux/cpu.h>
9#include <linux/debugfs.h>
10
11#include <asm/tlbflush.h>
12#include <asm/mmu_context.h>
13#include <asm/nospec-branch.h>
14#include <asm/cache.h>
15#include <asm/apic.h>
16#include <asm/uv/uv.h>
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38void clear_asid_other(void)
39{
40 u16 asid;
41
42
43
44
45
46 if (!static_cpu_has(X86_FEATURE_PTI)) {
47 WARN_ON_ONCE(1);
48 return;
49 }
50
51 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
52
53 if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
54 continue;
55
56
57
58
59 this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
60 }
61 this_cpu_write(cpu_tlbstate.invalidate_other, false);
62}
63
64atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
65
66
67static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
68 u16 *new_asid, bool *need_flush)
69{
70 u16 asid;
71
72 if (!static_cpu_has(X86_FEATURE_PCID)) {
73 *new_asid = 0;
74 *need_flush = true;
75 return;
76 }
77
78 if (this_cpu_read(cpu_tlbstate.invalidate_other))
79 clear_asid_other();
80
81 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
82 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
83 next->context.ctx_id)
84 continue;
85
86 *new_asid = asid;
87 *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
88 next_tlb_gen);
89 return;
90 }
91
92
93
94
95
96 *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
97 if (*new_asid >= TLB_NR_DYN_ASIDS) {
98 *new_asid = 0;
99 this_cpu_write(cpu_tlbstate.next_asid, 1);
100 }
101 *need_flush = true;
102}
103
104static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
105{
106 unsigned long new_mm_cr3;
107
108 if (need_flush) {
109 invalidate_user_asid(new_asid);
110 new_mm_cr3 = build_cr3(pgdir, new_asid);
111 } else {
112 new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
113 }
114
115
116
117
118
119
120 write_cr3(new_mm_cr3);
121}
122
123void leave_mm(int cpu)
124{
125 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
126
127
128
129
130
131
132
133
134
135 if (loaded_mm == &init_mm)
136 return;
137
138
139 WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
140
141 switch_mm(NULL, &init_mm, NULL);
142}
143EXPORT_SYMBOL_GPL(leave_mm);
144
145void switch_mm(struct mm_struct *prev, struct mm_struct *next,
146 struct task_struct *tsk)
147{
148 unsigned long flags;
149
150 local_irq_save(flags);
151 switch_mm_irqs_off(prev, next, tsk);
152 local_irq_restore(flags);
153}
154
155static void sync_current_stack_to_mm(struct mm_struct *mm)
156{
157 unsigned long sp = current_stack_pointer;
158 pgd_t *pgd = pgd_offset(mm, sp);
159
160 if (pgtable_l5_enabled()) {
161 if (unlikely(pgd_none(*pgd))) {
162 pgd_t *pgd_ref = pgd_offset_k(sp);
163
164 set_pgd(pgd, *pgd_ref);
165 }
166 } else {
167
168
169
170
171
172 p4d_t *p4d = p4d_offset(pgd, sp);
173
174 if (unlikely(p4d_none(*p4d))) {
175 pgd_t *pgd_ref = pgd_offset_k(sp);
176 p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
177
178 set_p4d(p4d, *p4d_ref);
179 }
180 }
181}
182
183void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
184 struct task_struct *tsk)
185{
186 struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
187 u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
188 unsigned cpu = smp_processor_id();
189 u64 next_tlb_gen;
190
191
192
193
194
195
196
197
198
199
200
201 if (IS_ENABLED(CONFIG_PROVE_LOCKING))
202 WARN_ON_ONCE(!irqs_disabled());
203
204
205
206
207
208
209
210
211
212
213#ifdef CONFIG_DEBUG_VM
214 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
215
216
217
218
219
220
221
222
223
224
225
226
227 __flush_tlb_all();
228 }
229#endif
230 this_cpu_write(cpu_tlbstate.is_lazy, false);
231
232
233
234
235
236
237
238 if (real_prev == next) {
239 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
240 next->context.ctx_id);
241
242
243
244
245
246
247
248
249 if (WARN_ON_ONCE(real_prev != &init_mm &&
250 !cpumask_test_cpu(cpu, mm_cpumask(next))))
251 cpumask_set_cpu(cpu, mm_cpumask(next));
252
253 return;
254 } else {
255 u16 new_asid;
256 bool need_flush;
257 u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274 if (tsk && tsk->mm &&
275 tsk->mm->context.ctx_id != last_ctx_id &&
276 get_dumpable(tsk->mm) != SUID_DUMP_USER)
277 indirect_branch_prediction_barrier();
278
279 if (IS_ENABLED(CONFIG_VMAP_STACK)) {
280
281
282
283
284
285 sync_current_stack_to_mm(next);
286 }
287
288
289 VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
290 real_prev != &init_mm);
291 cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
292
293
294
295
296 cpumask_set_cpu(cpu, mm_cpumask(next));
297 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
298
299 choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
300
301 if (need_flush) {
302 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
303 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
304 load_new_mm_cr3(next->pgd, new_asid, true);
305
306
307
308
309
310
311
312
313
314 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
315 } else {
316
317 load_new_mm_cr3(next->pgd, new_asid, false);
318
319
320 trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
321 }
322
323
324
325
326
327
328 if (next != &init_mm)
329 this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
330
331 this_cpu_write(cpu_tlbstate.loaded_mm, next);
332 this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
333 }
334
335 load_mm_cr4(next);
336 switch_ldt(real_prev, next);
337}
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
353{
354 if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
355 return;
356
357 if (tlb_defer_switch_to_init_mm()) {
358
359
360
361
362
363
364
365
366
367 this_cpu_write(cpu_tlbstate.is_lazy, true);
368 } else {
369 switch_mm(NULL, &init_mm, NULL);
370 }
371}
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386void initialize_tlbstate_and_flush(void)
387{
388 int i;
389 struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
390 u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
391 unsigned long cr3 = __read_cr3();
392
393
394 WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
395
396
397
398
399
400
401 WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
402 !(cr4_read_shadow() & X86_CR4_PCIDE));
403
404
405 write_cr3(build_cr3(mm->pgd, 0));
406
407
408 this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
409 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
410 this_cpu_write(cpu_tlbstate.next_asid, 1);
411 this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
412 this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
413
414 for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
415 this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
416}
417
418
419
420
421
422
423
424
425static void flush_tlb_func_common(const struct flush_tlb_info *f,
426 bool local, enum tlb_flush_reason reason)
427{
428
429
430
431
432
433
434
435
436
437 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
438 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
439 u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
440 u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
441
442
443 VM_WARN_ON(!irqs_disabled());
444
445 if (unlikely(loaded_mm == &init_mm))
446 return;
447
448 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
449 loaded_mm->context.ctx_id);
450
451 if (this_cpu_read(cpu_tlbstate.is_lazy)) {
452
453
454
455
456
457
458 switch_mm_irqs_off(NULL, &init_mm, NULL);
459 return;
460 }
461
462 if (unlikely(local_tlb_gen == mm_tlb_gen)) {
463
464
465
466
467
468
469 trace_tlb_flush(reason, 0);
470 return;
471 }
472
473 WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
474 WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513 if (f->end != TLB_FLUSH_ALL &&
514 f->new_tlb_gen == local_tlb_gen + 1 &&
515 f->new_tlb_gen == mm_tlb_gen) {
516
517 unsigned long addr;
518 unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
519
520 addr = f->start;
521 while (addr < f->end) {
522 __flush_tlb_one_user(addr);
523 addr += PAGE_SIZE;
524 }
525 if (local)
526 count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
527 trace_tlb_flush(reason, nr_pages);
528 } else {
529
530 local_flush_tlb();
531 if (local)
532 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
533 trace_tlb_flush(reason, TLB_FLUSH_ALL);
534 }
535
536
537 this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
538}
539
540static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
541{
542 const struct flush_tlb_info *f = info;
543
544 flush_tlb_func_common(f, true, reason);
545}
546
547static void flush_tlb_func_remote(void *info)
548{
549 const struct flush_tlb_info *f = info;
550
551 inc_irq_stat(irq_tlb_count);
552
553 if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
554 return;
555
556 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
557 flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
558}
559
560void native_flush_tlb_others(const struct cpumask *cpumask,
561 const struct flush_tlb_info *info)
562{
563 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
564 if (info->end == TLB_FLUSH_ALL)
565 trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
566 else
567 trace_tlb_flush(TLB_REMOTE_SEND_IPI,
568 (info->end - info->start) >> PAGE_SHIFT);
569
570 if (is_uv_system()) {
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586 unsigned int cpu;
587
588 cpu = smp_processor_id();
589 cpumask = uv_flush_tlb_others(cpumask, info);
590 if (cpumask)
591 smp_call_function_many(cpumask, flush_tlb_func_remote,
592 (void *)info, 1);
593 return;
594 }
595 smp_call_function_many(cpumask, flush_tlb_func_remote,
596 (void *)info, 1);
597}
598
599
600
601
602
603
604
605
606
607
608
609static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
610
611void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
612 unsigned long end, unsigned long vmflag)
613{
614 int cpu;
615
616 struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
617 .mm = mm,
618 };
619
620 cpu = get_cpu();
621
622
623 info.new_tlb_gen = inc_mm_tlb_gen(mm);
624
625
626 if ((end != TLB_FLUSH_ALL) &&
627 !(vmflag & VM_HUGETLB) &&
628 ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
629 info.start = start;
630 info.end = end;
631 } else {
632 info.start = 0UL;
633 info.end = TLB_FLUSH_ALL;
634 }
635
636 if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
637 VM_WARN_ON(irqs_disabled());
638 local_irq_disable();
639 flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
640 local_irq_enable();
641 }
642
643 if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
644 flush_tlb_others(mm_cpumask(mm), &info);
645
646 put_cpu();
647}
648
649
650static void do_flush_tlb_all(void *info)
651{
652 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
653 __flush_tlb_all();
654}
655
656void flush_tlb_all(void)
657{
658 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
659 on_each_cpu(do_flush_tlb_all, NULL, 1);
660}
661
662static void do_kernel_range_flush(void *info)
663{
664 struct flush_tlb_info *f = info;
665 unsigned long addr;
666
667
668 for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
669 __flush_tlb_one_kernel(addr);
670}
671
672void flush_tlb_kernel_range(unsigned long start, unsigned long end)
673{
674
675
676 if (end == TLB_FLUSH_ALL ||
677 (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
678 on_each_cpu(do_flush_tlb_all, NULL, 1);
679 } else {
680 struct flush_tlb_info info;
681 info.start = start;
682 info.end = end;
683 on_each_cpu(do_kernel_range_flush, &info, 1);
684 }
685}
686
687void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
688{
689 struct flush_tlb_info info = {
690 .mm = NULL,
691 .start = 0UL,
692 .end = TLB_FLUSH_ALL,
693 };
694
695 int cpu = get_cpu();
696
697 if (cpumask_test_cpu(cpu, &batch->cpumask)) {
698 VM_WARN_ON(irqs_disabled());
699 local_irq_disable();
700 flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
701 local_irq_enable();
702 }
703
704 if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
705 flush_tlb_others(&batch->cpumask, &info);
706
707 cpumask_clear(&batch->cpumask);
708
709 put_cpu();
710}
711
712static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
713 size_t count, loff_t *ppos)
714{
715 char buf[32];
716 unsigned int len;
717
718 len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
719 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
720}
721
722static ssize_t tlbflush_write_file(struct file *file,
723 const char __user *user_buf, size_t count, loff_t *ppos)
724{
725 char buf[32];
726 ssize_t len;
727 int ceiling;
728
729 len = min(count, sizeof(buf) - 1);
730 if (copy_from_user(buf, user_buf, len))
731 return -EFAULT;
732
733 buf[len] = '\0';
734 if (kstrtoint(buf, 0, &ceiling))
735 return -EINVAL;
736
737 if (ceiling < 0)
738 return -EINVAL;
739
740 tlb_single_page_flush_ceiling = ceiling;
741 return count;
742}
743
744static const struct file_operations fops_tlbflush = {
745 .read = tlbflush_read_file,
746 .write = tlbflush_write_file,
747 .llseek = default_llseek,
748};
749
750static int __init create_tlb_single_page_flush_ceiling(void)
751{
752 debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
753 arch_debugfs_dir, NULL, &fops_tlbflush);
754 return 0;
755}
756late_initcall(create_tlb_single_page_flush_ceiling);
757