linux/arch/x86/kernel/paravirt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*  Paravirtualization interfaces
   3    Copyright (C) 2006 Rusty Russell IBM Corporation
   4
   5
   6    2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc
   7*/
   8
   9#include <linux/errno.h>
  10#include <linux/init.h>
  11#include <linux/export.h>
  12#include <linux/efi.h>
  13#include <linux/bcd.h>
  14#include <linux/highmem.h>
  15#include <linux/kprobes.h>
  16#include <linux/pgtable.h>
  17#include <linux/static_call.h>
  18
  19#include <asm/bug.h>
  20#include <asm/paravirt.h>
  21#include <asm/debugreg.h>
  22#include <asm/desc.h>
  23#include <asm/setup.h>
  24#include <asm/time.h>
  25#include <asm/pgalloc.h>
  26#include <asm/irq.h>
  27#include <asm/delay.h>
  28#include <asm/fixmap.h>
  29#include <asm/apic.h>
  30#include <asm/tlbflush.h>
  31#include <asm/timer.h>
  32#include <asm/special_insns.h>
  33#include <asm/tlb.h>
  34#include <asm/io_bitmap.h>
  35
  36/*
  37 * nop stub, which must not clobber anything *including the stack* to
  38 * avoid confusing the entry prologues.
  39 */
  40extern void _paravirt_nop(void);
  41asm (".pushsection .entry.text, \"ax\"\n"
  42     ".global _paravirt_nop\n"
  43     "_paravirt_nop:\n\t"
  44     "ret\n\t"
  45     ".size _paravirt_nop, . - _paravirt_nop\n\t"
  46     ".type _paravirt_nop, @function\n\t"
  47     ".popsection");
  48
  49void __init default_banner(void)
  50{
  51        printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
  52               pv_info.name);
  53}
  54
  55/* Undefined instruction for dealing with missing ops pointers. */
  56static void paravirt_BUG(void)
  57{
  58        BUG();
  59}
  60
  61struct branch {
  62        unsigned char opcode;
  63        u32 delta;
  64} __attribute__((packed));
  65
  66static unsigned paravirt_patch_call(void *insn_buff, const void *target,
  67                                    unsigned long addr, unsigned len)
  68{
  69        const int call_len = 5;
  70        struct branch *b = insn_buff;
  71        unsigned long delta = (unsigned long)target - (addr+call_len);
  72
  73        if (len < call_len) {
  74                pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr);
  75                /* Kernel might not be viable if patching fails, bail out: */
  76                BUG_ON(1);
  77        }
  78
  79        b->opcode = 0xe8; /* call */
  80        b->delta = delta;
  81        BUILD_BUG_ON(sizeof(*b) != call_len);
  82
  83        return call_len;
  84}
  85
  86#ifdef CONFIG_PARAVIRT_XXL
  87/* identity function, which can be inlined */
  88u64 notrace _paravirt_ident_64(u64 x)
  89{
  90        return x;
  91}
  92#endif
  93
  94DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
  95
  96void __init native_pv_lock_init(void)
  97{
  98        if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
  99                static_branch_disable(&virt_spin_lock_key);
 100}
 101
 102unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr,
 103                            unsigned int len)
 104{
 105        /*
 106         * Neat trick to map patch type back to the call within the
 107         * corresponding structure.
 108         */
 109        void *opfunc = *((void **)&pv_ops + type);
 110        unsigned ret;
 111
 112        if (opfunc == NULL)
 113                /* If there's no function, patch it with paravirt_BUG() */
 114                ret = paravirt_patch_call(insn_buff, paravirt_BUG, addr, len);
 115        else if (opfunc == _paravirt_nop)
 116                ret = 0;
 117        else
 118                /* Otherwise call the function. */
 119                ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
 120
 121        return ret;
 122}
 123
 124struct static_key paravirt_steal_enabled;
 125struct static_key paravirt_steal_rq_enabled;
 126
 127static u64 native_steal_clock(int cpu)
 128{
 129        return 0;
 130}
 131
 132DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock);
 133DEFINE_STATIC_CALL(pv_sched_clock, native_sched_clock);
 134
 135void paravirt_set_sched_clock(u64 (*func)(void))
 136{
 137        static_call_update(pv_sched_clock, func);
 138}
 139
 140/* These are in entry.S */
 141extern void native_iret(void);
 142
 143static struct resource reserve_ioports = {
 144        .start = 0,
 145        .end = IO_SPACE_LIMIT,
 146        .name = "paravirt-ioport",
 147        .flags = IORESOURCE_IO | IORESOURCE_BUSY,
 148};
 149
 150/*
 151 * Reserve the whole legacy IO space to prevent any legacy drivers
 152 * from wasting time probing for their hardware.  This is a fairly
 153 * brute-force approach to disabling all non-virtual drivers.
 154 *
 155 * Note that this must be called very early to have any effect.
 156 */
 157int paravirt_disable_iospace(void)
 158{
 159        return request_resource(&ioport_resource, &reserve_ioports);
 160}
 161
 162static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
 163
 164static inline void enter_lazy(enum paravirt_lazy_mode mode)
 165{
 166        BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
 167
 168        this_cpu_write(paravirt_lazy_mode, mode);
 169}
 170
 171static void leave_lazy(enum paravirt_lazy_mode mode)
 172{
 173        BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
 174
 175        this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
 176}
 177
 178void paravirt_enter_lazy_mmu(void)
 179{
 180        enter_lazy(PARAVIRT_LAZY_MMU);
 181}
 182
 183void paravirt_leave_lazy_mmu(void)
 184{
 185        leave_lazy(PARAVIRT_LAZY_MMU);
 186}
 187
 188void paravirt_flush_lazy_mmu(void)
 189{
 190        preempt_disable();
 191
 192        if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
 193                arch_leave_lazy_mmu_mode();
 194                arch_enter_lazy_mmu_mode();
 195        }
 196
 197        preempt_enable();
 198}
 199
 200#ifdef CONFIG_PARAVIRT_XXL
 201void paravirt_start_context_switch(struct task_struct *prev)
 202{
 203        BUG_ON(preemptible());
 204
 205        if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
 206                arch_leave_lazy_mmu_mode();
 207                set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
 208        }
 209        enter_lazy(PARAVIRT_LAZY_CPU);
 210}
 211
 212void paravirt_end_context_switch(struct task_struct *next)
 213{
 214        BUG_ON(preemptible());
 215
 216        leave_lazy(PARAVIRT_LAZY_CPU);
 217
 218        if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
 219                arch_enter_lazy_mmu_mode();
 220}
 221#endif
 222
 223enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 224{
 225        if (in_interrupt())
 226                return PARAVIRT_LAZY_NONE;
 227
 228        return this_cpu_read(paravirt_lazy_mode);
 229}
 230
 231struct pv_info pv_info = {
 232        .name = "bare hardware",
 233#ifdef CONFIG_PARAVIRT_XXL
 234        .extra_user_64bit_cs = __USER_CS,
 235#endif
 236};
 237
 238/* 64-bit pagetable entries */
 239#define PTE_IDENT       __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
 240
 241struct paravirt_patch_template pv_ops = {
 242        /* Cpu ops. */
 243        .cpu.io_delay           = native_io_delay,
 244
 245#ifdef CONFIG_PARAVIRT_XXL
 246        .cpu.cpuid              = native_cpuid,
 247        .cpu.get_debugreg       = native_get_debugreg,
 248        .cpu.set_debugreg       = native_set_debugreg,
 249        .cpu.read_cr0           = native_read_cr0,
 250        .cpu.write_cr0          = native_write_cr0,
 251        .cpu.write_cr4          = native_write_cr4,
 252        .cpu.wbinvd             = native_wbinvd,
 253        .cpu.read_msr           = native_read_msr,
 254        .cpu.write_msr          = native_write_msr,
 255        .cpu.read_msr_safe      = native_read_msr_safe,
 256        .cpu.write_msr_safe     = native_write_msr_safe,
 257        .cpu.read_pmc           = native_read_pmc,
 258        .cpu.load_tr_desc       = native_load_tr_desc,
 259        .cpu.set_ldt            = native_set_ldt,
 260        .cpu.load_gdt           = native_load_gdt,
 261        .cpu.load_idt           = native_load_idt,
 262        .cpu.store_tr           = native_store_tr,
 263        .cpu.load_tls           = native_load_tls,
 264        .cpu.load_gs_index      = native_load_gs_index,
 265        .cpu.write_ldt_entry    = native_write_ldt_entry,
 266        .cpu.write_gdt_entry    = native_write_gdt_entry,
 267        .cpu.write_idt_entry    = native_write_idt_entry,
 268
 269        .cpu.alloc_ldt          = paravirt_nop,
 270        .cpu.free_ldt           = paravirt_nop,
 271
 272        .cpu.load_sp0           = native_load_sp0,
 273
 274#ifdef CONFIG_X86_IOPL_IOPERM
 275        .cpu.invalidate_io_bitmap       = native_tss_invalidate_io_bitmap,
 276        .cpu.update_io_bitmap           = native_tss_update_io_bitmap,
 277#endif
 278
 279        .cpu.start_context_switch       = paravirt_nop,
 280        .cpu.end_context_switch         = paravirt_nop,
 281
 282        /* Irq ops. */
 283        .irq.save_fl            = __PV_IS_CALLEE_SAVE(native_save_fl),
 284        .irq.irq_disable        = __PV_IS_CALLEE_SAVE(native_irq_disable),
 285        .irq.irq_enable         = __PV_IS_CALLEE_SAVE(native_irq_enable),
 286        .irq.safe_halt          = native_safe_halt,
 287        .irq.halt               = native_halt,
 288#endif /* CONFIG_PARAVIRT_XXL */
 289
 290        /* Mmu ops. */
 291        .mmu.flush_tlb_user     = native_flush_tlb_local,
 292        .mmu.flush_tlb_kernel   = native_flush_tlb_global,
 293        .mmu.flush_tlb_one_user = native_flush_tlb_one_user,
 294        .mmu.flush_tlb_multi    = native_flush_tlb_multi,
 295        .mmu.tlb_remove_table   =
 296                        (void (*)(struct mmu_gather *, void *))tlb_remove_page,
 297
 298        .mmu.exit_mmap          = paravirt_nop,
 299
 300#ifdef CONFIG_PARAVIRT_XXL
 301        .mmu.read_cr2           = __PV_IS_CALLEE_SAVE(native_read_cr2),
 302        .mmu.write_cr2          = native_write_cr2,
 303        .mmu.read_cr3           = __native_read_cr3,
 304        .mmu.write_cr3          = native_write_cr3,
 305
 306        .mmu.pgd_alloc          = __paravirt_pgd_alloc,
 307        .mmu.pgd_free           = paravirt_nop,
 308
 309        .mmu.alloc_pte          = paravirt_nop,
 310        .mmu.alloc_pmd          = paravirt_nop,
 311        .mmu.alloc_pud          = paravirt_nop,
 312        .mmu.alloc_p4d          = paravirt_nop,
 313        .mmu.release_pte        = paravirt_nop,
 314        .mmu.release_pmd        = paravirt_nop,
 315        .mmu.release_pud        = paravirt_nop,
 316        .mmu.release_p4d        = paravirt_nop,
 317
 318        .mmu.set_pte            = native_set_pte,
 319        .mmu.set_pmd            = native_set_pmd,
 320
 321        .mmu.ptep_modify_prot_start     = __ptep_modify_prot_start,
 322        .mmu.ptep_modify_prot_commit    = __ptep_modify_prot_commit,
 323
 324        .mmu.set_pud            = native_set_pud,
 325
 326        .mmu.pmd_val            = PTE_IDENT,
 327        .mmu.make_pmd           = PTE_IDENT,
 328
 329        .mmu.pud_val            = PTE_IDENT,
 330        .mmu.make_pud           = PTE_IDENT,
 331
 332        .mmu.set_p4d            = native_set_p4d,
 333
 334#if CONFIG_PGTABLE_LEVELS >= 5
 335        .mmu.p4d_val            = PTE_IDENT,
 336        .mmu.make_p4d           = PTE_IDENT,
 337
 338        .mmu.set_pgd            = native_set_pgd,
 339#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
 340
 341        .mmu.pte_val            = PTE_IDENT,
 342        .mmu.pgd_val            = PTE_IDENT,
 343
 344        .mmu.make_pte           = PTE_IDENT,
 345        .mmu.make_pgd           = PTE_IDENT,
 346
 347        .mmu.dup_mmap           = paravirt_nop,
 348        .mmu.activate_mm        = paravirt_nop,
 349
 350        .mmu.lazy_mode = {
 351                .enter          = paravirt_nop,
 352                .leave          = paravirt_nop,
 353                .flush          = paravirt_nop,
 354        },
 355
 356        .mmu.set_fixmap         = native_set_fixmap,
 357#endif /* CONFIG_PARAVIRT_XXL */
 358
 359#if defined(CONFIG_PARAVIRT_SPINLOCKS)
 360        /* Lock ops. */
 361#ifdef CONFIG_SMP
 362        .lock.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
 363        .lock.queued_spin_unlock        =
 364                                PV_CALLEE_SAVE(__native_queued_spin_unlock),
 365        .lock.wait                      = paravirt_nop,
 366        .lock.kick                      = paravirt_nop,
 367        .lock.vcpu_is_preempted         =
 368                                PV_CALLEE_SAVE(__native_vcpu_is_preempted),
 369#endif /* SMP */
 370#endif
 371};
 372
 373#ifdef CONFIG_PARAVIRT_XXL
 374/* At this point, native_get/set_debugreg has real function entries */
 375NOKPROBE_SYMBOL(native_get_debugreg);
 376NOKPROBE_SYMBOL(native_set_debugreg);
 377NOKPROBE_SYMBOL(native_load_idt);
 378
 379void (*paravirt_iret)(void) = native_iret;
 380#endif
 381
 382EXPORT_SYMBOL(pv_ops);
 383EXPORT_SYMBOL_GPL(pv_info);
 384