1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/errno.h>
22#include <linux/module.h>
23#include <linux/efi.h>
24#include <linux/bcd.h>
25#include <linux/highmem.h>
26
27#include <asm/bug.h>
28#include <asm/paravirt.h>
29#include <asm/debugreg.h>
30#include <asm/desc.h>
31#include <asm/setup.h>
32#include <asm/pgtable.h>
33#include <asm/time.h>
34#include <asm/pgalloc.h>
35#include <asm/irq.h>
36#include <asm/delay.h>
37#include <asm/fixmap.h>
38#include <asm/apic.h>
39#include <asm/tlbflush.h>
40#include <asm/timer.h>
41#include <asm/special_insns.h>
42
43
44
45
46
47extern void _paravirt_nop(void);
48asm (".pushsection .entry.text, \"ax\"\n"
49 ".global _paravirt_nop\n"
50 "_paravirt_nop:\n\t"
51 "ret\n\t"
52 ".size _paravirt_nop, . - _paravirt_nop\n\t"
53 ".type _paravirt_nop, @function\n\t"
54 ".popsection");
55
56
57u32 notrace _paravirt_ident_32(u32 x)
58{
59 return x;
60}
61
62u64 notrace _paravirt_ident_64(u64 x)
63{
64 return x;
65}
66
67void __init default_banner(void)
68{
69 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
70 pv_info.name);
71}
72
73
74#define DEF_NATIVE(ops, name, code) \
75 extern const char start_##ops##_##name[], end_##ops##_##name[]; \
76 asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
77
78
79static const unsigned char ud2a[] = { 0x0f, 0x0b };
80
81unsigned paravirt_patch_nop(void)
82{
83 return 0;
84}
85
86unsigned paravirt_patch_ignore(unsigned len)
87{
88 return len;
89}
90
91struct branch {
92 unsigned char opcode;
93 u32 delta;
94} __attribute__((packed));
95
96unsigned paravirt_patch_call(void *insnbuf,
97 const void *target, u16 tgt_clobbers,
98 unsigned long addr, u16 site_clobbers,
99 unsigned len)
100{
101 struct branch *b = insnbuf;
102 unsigned long delta = (unsigned long)target - (addr+5);
103
104 if (tgt_clobbers & ~site_clobbers)
105 return len;
106 if (len < 5)
107 return len;
108
109 b->opcode = 0xe8;
110 b->delta = delta;
111 BUILD_BUG_ON(sizeof(*b) != 5);
112
113 return 5;
114}
115
116unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
117 unsigned long addr, unsigned len)
118{
119 struct branch *b = insnbuf;
120 unsigned long delta = (unsigned long)target - (addr+5);
121
122 if (len < 5)
123 return len;
124
125 b->opcode = 0xe9;
126 b->delta = delta;
127
128 return 5;
129}
130
131
132
133static void *get_call_destination(u8 type)
134{
135 struct paravirt_patch_template tmpl = {
136 .pv_init_ops = pv_init_ops,
137 .pv_time_ops = pv_time_ops,
138 .pv_cpu_ops = pv_cpu_ops,
139 .pv_irq_ops = pv_irq_ops,
140 .pv_apic_ops = pv_apic_ops,
141 .pv_mmu_ops = pv_mmu_ops,
142#ifdef CONFIG_PARAVIRT_SPINLOCKS
143 .pv_lock_ops = pv_lock_ops,
144#endif
145 };
146 return *((void **)&tmpl + type);
147}
148
149unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
150 unsigned long addr, unsigned len)
151{
152 void *opfunc = get_call_destination(type);
153 unsigned ret;
154
155 if (opfunc == NULL)
156
157 ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
158 else if (opfunc == _paravirt_nop)
159
160 ret = paravirt_patch_nop();
161
162
163 else if (opfunc == _paravirt_ident_32)
164 ret = paravirt_patch_ident_32(insnbuf, len);
165 else if (opfunc == _paravirt_ident_64)
166 ret = paravirt_patch_ident_64(insnbuf, len);
167
168 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
169 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
170 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
171 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
172
173 ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
174 else
175
176
177 ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY,
178 addr, clobbers, len);
179
180 return ret;
181}
182
183unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
184 const char *start, const char *end)
185{
186 unsigned insn_len = end - start;
187
188 if (insn_len > len || start == NULL)
189 insn_len = len;
190 else
191 memcpy(insnbuf, start, insn_len);
192
193 return insn_len;
194}
195
196static void native_flush_tlb(void)
197{
198 __native_flush_tlb();
199}
200
201
202
203
204
205static void native_flush_tlb_global(void)
206{
207 __native_flush_tlb_global();
208}
209
210static void native_flush_tlb_single(unsigned long addr)
211{
212 __native_flush_tlb_single(addr);
213}
214
215struct static_key paravirt_steal_enabled;
216struct static_key paravirt_steal_rq_enabled;
217
218static u64 native_steal_clock(int cpu)
219{
220 return 0;
221}
222
223
224extern void native_iret(void);
225extern void native_irq_enable_sysexit(void);
226extern void native_usergs_sysret32(void);
227extern void native_usergs_sysret64(void);
228
229static struct resource reserve_ioports = {
230 .start = 0,
231 .end = IO_SPACE_LIMIT,
232 .name = "paravirt-ioport",
233 .flags = IORESOURCE_IO | IORESOURCE_BUSY,
234};
235
236
237
238
239
240
241
242
243int paravirt_disable_iospace(void)
244{
245 return request_resource(&ioport_resource, &reserve_ioports);
246}
247
248static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
249
250static inline void enter_lazy(enum paravirt_lazy_mode mode)
251{
252 BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
253
254 this_cpu_write(paravirt_lazy_mode, mode);
255}
256
257static void leave_lazy(enum paravirt_lazy_mode mode)
258{
259 BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
260
261 this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
262}
263
264void paravirt_enter_lazy_mmu(void)
265{
266 enter_lazy(PARAVIRT_LAZY_MMU);
267}
268
269void paravirt_leave_lazy_mmu(void)
270{
271 leave_lazy(PARAVIRT_LAZY_MMU);
272}
273
274void paravirt_flush_lazy_mmu(void)
275{
276 preempt_disable();
277
278 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
279 arch_leave_lazy_mmu_mode();
280 arch_enter_lazy_mmu_mode();
281 }
282
283 preempt_enable();
284}
285
286void paravirt_start_context_switch(struct task_struct *prev)
287{
288 BUG_ON(preemptible());
289
290 if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
291 arch_leave_lazy_mmu_mode();
292 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
293 }
294 enter_lazy(PARAVIRT_LAZY_CPU);
295}
296
297void paravirt_end_context_switch(struct task_struct *next)
298{
299 BUG_ON(preemptible());
300
301 leave_lazy(PARAVIRT_LAZY_CPU);
302
303 if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
304 arch_enter_lazy_mmu_mode();
305}
306
307enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
308{
309 if (in_interrupt())
310 return PARAVIRT_LAZY_NONE;
311
312 return this_cpu_read(paravirt_lazy_mode);
313}
314
315struct pv_info pv_info = {
316 .name = "bare hardware",
317 .paravirt_enabled = 0,
318 .kernel_rpl = 0,
319 .shared_kernel_pmd = 1,
320
321#ifdef CONFIG_X86_64
322 .extra_user_64bit_cs = __USER_CS,
323#endif
324};
325
326struct pv_init_ops pv_init_ops = {
327 .patch = native_patch,
328};
329
330struct pv_time_ops pv_time_ops = {
331 .sched_clock = native_sched_clock,
332 .steal_clock = native_steal_clock,
333};
334
335struct pv_irq_ops pv_irq_ops = {
336 .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
337 .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
338 .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
339 .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
340 .safe_halt = native_safe_halt,
341 .halt = native_halt,
342#ifdef CONFIG_X86_64
343 .adjust_exception_frame = paravirt_nop,
344#endif
345};
346
347struct pv_cpu_ops pv_cpu_ops = {
348 .cpuid = native_cpuid,
349 .get_debugreg = native_get_debugreg,
350 .set_debugreg = native_set_debugreg,
351 .clts = native_clts,
352 .read_cr0 = native_read_cr0,
353 .write_cr0 = native_write_cr0,
354 .read_cr4 = native_read_cr4,
355 .read_cr4_safe = native_read_cr4_safe,
356 .write_cr4 = native_write_cr4,
357#ifdef CONFIG_X86_64
358 .read_cr8 = native_read_cr8,
359 .write_cr8 = native_write_cr8,
360#endif
361 .wbinvd = native_wbinvd,
362 .read_msr = native_read_msr_safe,
363 .write_msr = native_write_msr_safe,
364 .read_tsc = native_read_tsc,
365 .read_pmc = native_read_pmc,
366 .load_tr_desc = native_load_tr_desc,
367 .set_ldt = native_set_ldt,
368 .load_gdt = native_load_gdt,
369 .load_idt = native_load_idt,
370 .store_idt = native_store_idt,
371 .store_tr = native_store_tr,
372 .load_tls = native_load_tls,
373#ifdef CONFIG_X86_64
374 .load_gs_index = native_load_gs_index,
375#endif
376 .write_ldt_entry = native_write_ldt_entry,
377 .write_gdt_entry = native_write_gdt_entry,
378 .write_idt_entry = native_write_idt_entry,
379
380 .alloc_ldt = paravirt_nop,
381 .free_ldt = paravirt_nop,
382
383 .load_sp0 = native_load_sp0,
384
385#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
386 .irq_enable_sysexit = native_irq_enable_sysexit,
387#endif
388#ifdef CONFIG_X86_64
389#ifdef CONFIG_IA32_EMULATION
390 .usergs_sysret32 = native_usergs_sysret32,
391#endif
392 .usergs_sysret64 = native_usergs_sysret64,
393#endif
394 .iret = native_iret,
395 .swapgs = native_swapgs,
396
397 .set_iopl_mask = native_set_iopl_mask,
398 .io_delay = native_io_delay,
399
400 .start_context_switch = paravirt_nop,
401 .end_context_switch = paravirt_nop,
402};
403
404struct pv_apic_ops pv_apic_ops = {
405#ifdef CONFIG_X86_LOCAL_APIC
406 .startup_ipi_hook = paravirt_nop,
407#endif
408};
409
410#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
411
412#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32)
413#else
414
415#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
416#endif
417
418struct pv_mmu_ops pv_mmu_ops = {
419
420 .read_cr2 = native_read_cr2,
421 .write_cr2 = native_write_cr2,
422 .read_cr3 = native_read_cr3,
423 .write_cr3 = native_write_cr3,
424
425 .flush_tlb_user = native_flush_tlb,
426 .flush_tlb_kernel = native_flush_tlb_global,
427 .flush_tlb_single = native_flush_tlb_single,
428 .flush_tlb_others = native_flush_tlb_others,
429
430 .pgd_alloc = __paravirt_pgd_alloc,
431 .pgd_free = paravirt_nop,
432
433 .alloc_pte = paravirt_nop,
434 .alloc_pmd = paravirt_nop,
435 .alloc_pud = paravirt_nop,
436 .release_pte = paravirt_nop,
437 .release_pmd = paravirt_nop,
438 .release_pud = paravirt_nop,
439
440 .set_pte = native_set_pte,
441 .set_pte_at = native_set_pte_at,
442 .set_pmd = native_set_pmd,
443 .set_pmd_at = native_set_pmd_at,
444 .pte_update = paravirt_nop,
445
446 .ptep_modify_prot_start = __ptep_modify_prot_start,
447 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
448
449#if PAGETABLE_LEVELS >= 3
450#ifdef CONFIG_X86_PAE
451 .set_pte_atomic = native_set_pte_atomic,
452 .pte_clear = native_pte_clear,
453 .pmd_clear = native_pmd_clear,
454#endif
455 .set_pud = native_set_pud,
456
457 .pmd_val = PTE_IDENT,
458 .make_pmd = PTE_IDENT,
459
460#if PAGETABLE_LEVELS == 4
461 .pud_val = PTE_IDENT,
462 .make_pud = PTE_IDENT,
463
464 .set_pgd = native_set_pgd,
465#endif
466#endif
467
468 .pte_val = PTE_IDENT,
469 .pgd_val = PTE_IDENT,
470
471 .make_pte = PTE_IDENT,
472 .make_pgd = PTE_IDENT,
473
474 .dup_mmap = paravirt_nop,
475 .exit_mmap = paravirt_nop,
476 .activate_mm = paravirt_nop,
477
478 .lazy_mode = {
479 .enter = paravirt_nop,
480 .leave = paravirt_nop,
481 .flush = paravirt_nop,
482 },
483
484 .set_fixmap = native_set_fixmap,
485};
486
487EXPORT_SYMBOL_GPL(pv_time_ops);
488EXPORT_SYMBOL (pv_cpu_ops);
489EXPORT_SYMBOL (pv_mmu_ops);
490EXPORT_SYMBOL_GPL(pv_apic_ops);
491EXPORT_SYMBOL_GPL(pv_info);
492EXPORT_SYMBOL (pv_irq_ops);
493