linux/arch/x86/kernel/alternative.c
<<
>>
Prefs
   1#include <linux/module.h>
   2#include <linux/sched.h>
   3#include <linux/spinlock.h>
   4#include <linux/list.h>
   5#include <linux/kprobes.h>
   6#include <linux/mm.h>
   7#include <linux/vmalloc.h>
   8#include <asm/alternative.h>
   9#include <asm/sections.h>
  10#include <asm/pgtable.h>
  11#include <asm/mce.h>
  12#include <asm/nmi.h>
  13#include <asm/vsyscall.h>
  14
  15#define MAX_PATCH_LEN (255-1)
  16
  17#ifdef CONFIG_HOTPLUG_CPU
  18static int smp_alt_once;
  19
  20static int __init bootonly(char *str)
  21{
  22        smp_alt_once = 1;
  23        return 1;
  24}
  25__setup("smp-alt-boot", bootonly);
  26#else
  27#define smp_alt_once 1
  28#endif
  29
  30static int debug_alternative;
  31
  32static int __init debug_alt(char *str)
  33{
  34        debug_alternative = 1;
  35        return 1;
  36}
  37__setup("debug-alternative", debug_alt);
  38
  39static int noreplace_smp;
  40
  41static int __init setup_noreplace_smp(char *str)
  42{
  43        noreplace_smp = 1;
  44        return 1;
  45}
  46__setup("noreplace-smp", setup_noreplace_smp);
  47
  48#ifdef CONFIG_PARAVIRT
  49static int noreplace_paravirt = 0;
  50
  51static int __init setup_noreplace_paravirt(char *str)
  52{
  53        noreplace_paravirt = 1;
  54        return 1;
  55}
  56__setup("noreplace-paravirt", setup_noreplace_paravirt);
  57#endif
  58
  59#define DPRINTK(fmt, args...) if (debug_alternative) \
  60        printk(KERN_DEBUG fmt, args)
  61
  62#ifdef GENERIC_NOP1
  63/* Use inline assembly to define this because the nops are defined
  64   as inline assembly strings in the include files and we cannot
  65   get them easily into strings. */
  66asm("\t.section .rodata, \"a\"\nintelnops: "
  67        GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
  68        GENERIC_NOP7 GENERIC_NOP8);
  69extern const unsigned char intelnops[];
  70static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
  71        NULL,
  72        intelnops,
  73        intelnops + 1,
  74        intelnops + 1 + 2,
  75        intelnops + 1 + 2 + 3,
  76        intelnops + 1 + 2 + 3 + 4,
  77        intelnops + 1 + 2 + 3 + 4 + 5,
  78        intelnops + 1 + 2 + 3 + 4 + 5 + 6,
  79        intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
  80};
  81#endif
  82
  83#ifdef K8_NOP1
  84asm("\t.section .rodata, \"a\"\nk8nops: "
  85        K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
  86        K8_NOP7 K8_NOP8);
  87extern const unsigned char k8nops[];
  88static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
  89        NULL,
  90        k8nops,
  91        k8nops + 1,
  92        k8nops + 1 + 2,
  93        k8nops + 1 + 2 + 3,
  94        k8nops + 1 + 2 + 3 + 4,
  95        k8nops + 1 + 2 + 3 + 4 + 5,
  96        k8nops + 1 + 2 + 3 + 4 + 5 + 6,
  97        k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
  98};
  99#endif
 100
 101#ifdef K7_NOP1
 102asm("\t.section .rodata, \"a\"\nk7nops: "
 103        K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
 104        K7_NOP7 K7_NOP8);
 105extern const unsigned char k7nops[];
 106static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
 107        NULL,
 108        k7nops,
 109        k7nops + 1,
 110        k7nops + 1 + 2,
 111        k7nops + 1 + 2 + 3,
 112        k7nops + 1 + 2 + 3 + 4,
 113        k7nops + 1 + 2 + 3 + 4 + 5,
 114        k7nops + 1 + 2 + 3 + 4 + 5 + 6,
 115        k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 116};
 117#endif
 118
 119#ifdef P6_NOP1
 120asm("\t.section .rodata, \"a\"\np6nops: "
 121        P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
 122        P6_NOP7 P6_NOP8);
 123extern const unsigned char p6nops[];
 124static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
 125        NULL,
 126        p6nops,
 127        p6nops + 1,
 128        p6nops + 1 + 2,
 129        p6nops + 1 + 2 + 3,
 130        p6nops + 1 + 2 + 3 + 4,
 131        p6nops + 1 + 2 + 3 + 4 + 5,
 132        p6nops + 1 + 2 + 3 + 4 + 5 + 6,
 133        p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 134};
 135#endif
 136
 137#ifdef CONFIG_X86_64
 138
 139extern char __vsyscall_0;
 140static inline const unsigned char*const * find_nop_table(void)
 141{
 142        return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
 143               boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
 144}
 145
 146#else /* CONFIG_X86_64 */
 147
 148static const struct nop {
 149        int cpuid;
 150        const unsigned char *const *noptable;
 151} noptypes[] = {
 152        { X86_FEATURE_K8, k8_nops },
 153        { X86_FEATURE_K7, k7_nops },
 154        { X86_FEATURE_P4, p6_nops },
 155        { X86_FEATURE_P3, p6_nops },
 156        { -1, NULL }
 157};
 158
 159static const unsigned char*const * find_nop_table(void)
 160{
 161        const unsigned char *const *noptable = intel_nops;
 162        int i;
 163
 164        for (i = 0; noptypes[i].cpuid >= 0; i++) {
 165                if (boot_cpu_has(noptypes[i].cpuid)) {
 166                        noptable = noptypes[i].noptable;
 167                        break;
 168                }
 169        }
 170        return noptable;
 171}
 172
 173#endif /* CONFIG_X86_64 */
 174
 175/* Use this to add nops to a buffer, then text_poke the whole buffer. */
 176static void add_nops(void *insns, unsigned int len)
 177{
 178        const unsigned char *const *noptable = find_nop_table();
 179
 180        while (len > 0) {
 181                unsigned int noplen = len;
 182                if (noplen > ASM_NOP_MAX)
 183                        noplen = ASM_NOP_MAX;
 184                memcpy(insns, noptable[noplen], noplen);
 185                insns += noplen;
 186                len -= noplen;
 187        }
 188}
 189
 190extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 191extern u8 *__smp_locks[], *__smp_locks_end[];
 192
 193/* Replace instructions with better alternatives for this CPU type.
 194   This runs before SMP is initialized to avoid SMP problems with
 195   self modifying code. This implies that assymetric systems where
 196   APs have less capabilities than the boot processor are not handled.
 197   Tough. Make sure you disable such features by hand. */
 198
 199void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 200{
 201        struct alt_instr *a;
 202        char insnbuf[MAX_PATCH_LEN];
 203
 204        DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
 205        for (a = start; a < end; a++) {
 206                u8 *instr = a->instr;
 207                BUG_ON(a->replacementlen > a->instrlen);
 208                BUG_ON(a->instrlen > sizeof(insnbuf));
 209                if (!boot_cpu_has(a->cpuid))
 210                        continue;
 211#ifdef CONFIG_X86_64
 212                /* vsyscall code is not mapped yet. resolve it manually. */
 213                if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
 214                        instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
 215                        DPRINTK("%s: vsyscall fixup: %p => %p\n",
 216                                __FUNCTION__, a->instr, instr);
 217                }
 218#endif
 219                memcpy(insnbuf, a->replacement, a->replacementlen);
 220                add_nops(insnbuf + a->replacementlen,
 221                         a->instrlen - a->replacementlen);
 222                text_poke(instr, insnbuf, a->instrlen);
 223        }
 224}
 225
 226#ifdef CONFIG_SMP
 227
 228static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
 229{
 230        u8 **ptr;
 231
 232        for (ptr = start; ptr < end; ptr++) {
 233                if (*ptr < text)
 234                        continue;
 235                if (*ptr > text_end)
 236                        continue;
 237                text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
 238        };
 239}
 240
 241static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
 242{
 243        u8 **ptr;
 244        char insn[1];
 245
 246        if (noreplace_smp)
 247                return;
 248
 249        add_nops(insn, 1);
 250        for (ptr = start; ptr < end; ptr++) {
 251                if (*ptr < text)
 252                        continue;
 253                if (*ptr > text_end)
 254                        continue;
 255                text_poke(*ptr, insn, 1);
 256        };
 257}
 258
 259struct smp_alt_module {
 260        /* what is this ??? */
 261        struct module   *mod;
 262        char            *name;
 263
 264        /* ptrs to lock prefixes */
 265        u8              **locks;
 266        u8              **locks_end;
 267
 268        /* .text segment, needed to avoid patching init code ;) */
 269        u8              *text;
 270        u8              *text_end;
 271
 272        struct list_head next;
 273};
 274static LIST_HEAD(smp_alt_modules);
 275static DEFINE_SPINLOCK(smp_alt);
 276
 277void alternatives_smp_module_add(struct module *mod, char *name,
 278                                 void *locks, void *locks_end,
 279                                 void *text,  void *text_end)
 280{
 281        struct smp_alt_module *smp;
 282        unsigned long flags;
 283
 284        if (noreplace_smp)
 285                return;
 286
 287        if (smp_alt_once) {
 288                if (boot_cpu_has(X86_FEATURE_UP))
 289                        alternatives_smp_unlock(locks, locks_end,
 290                                                text, text_end);
 291                return;
 292        }
 293
 294        smp = kzalloc(sizeof(*smp), GFP_KERNEL);
 295        if (NULL == smp)
 296                return; /* we'll run the (safe but slow) SMP code then ... */
 297
 298        smp->mod        = mod;
 299        smp->name       = name;
 300        smp->locks      = locks;
 301        smp->locks_end  = locks_end;
 302        smp->text       = text;
 303        smp->text_end   = text_end;
 304        DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
 305                __FUNCTION__, smp->locks, smp->locks_end,
 306                smp->text, smp->text_end, smp->name);
 307
 308        spin_lock_irqsave(&smp_alt, flags);
 309        list_add_tail(&smp->next, &smp_alt_modules);
 310        if (boot_cpu_has(X86_FEATURE_UP))
 311                alternatives_smp_unlock(smp->locks, smp->locks_end,
 312                                        smp->text, smp->text_end);
 313        spin_unlock_irqrestore(&smp_alt, flags);
 314}
 315
 316void alternatives_smp_module_del(struct module *mod)
 317{
 318        struct smp_alt_module *item;
 319        unsigned long flags;
 320
 321        if (smp_alt_once || noreplace_smp)
 322                return;
 323
 324        spin_lock_irqsave(&smp_alt, flags);
 325        list_for_each_entry(item, &smp_alt_modules, next) {
 326                if (mod != item->mod)
 327                        continue;
 328                list_del(&item->next);
 329                spin_unlock_irqrestore(&smp_alt, flags);
 330                DPRINTK("%s: %s\n", __FUNCTION__, item->name);
 331                kfree(item);
 332                return;
 333        }
 334        spin_unlock_irqrestore(&smp_alt, flags);
 335}
 336
 337void alternatives_smp_switch(int smp)
 338{
 339        struct smp_alt_module *mod;
 340        unsigned long flags;
 341
 342#ifdef CONFIG_LOCKDEP
 343        /*
 344         * A not yet fixed binutils section handling bug prevents
 345         * alternatives-replacement from working reliably, so turn
 346         * it off:
 347         */
 348        printk("lockdep: not fixing up alternatives.\n");
 349        return;
 350#endif
 351
 352        if (noreplace_smp || smp_alt_once)
 353                return;
 354        BUG_ON(!smp && (num_online_cpus() > 1));
 355
 356        spin_lock_irqsave(&smp_alt, flags);
 357        if (smp) {
 358                printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
 359                clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
 360                clear_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
 361                list_for_each_entry(mod, &smp_alt_modules, next)
 362                        alternatives_smp_lock(mod->locks, mod->locks_end,
 363                                              mod->text, mod->text_end);
 364        } else {
 365                printk(KERN_INFO "SMP alternatives: switching to UP code\n");
 366                set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
 367                set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
 368                list_for_each_entry(mod, &smp_alt_modules, next)
 369                        alternatives_smp_unlock(mod->locks, mod->locks_end,
 370                                                mod->text, mod->text_end);
 371        }
 372        spin_unlock_irqrestore(&smp_alt, flags);
 373}
 374
 375#endif
 376
 377#ifdef CONFIG_PARAVIRT
 378void apply_paravirt(struct paravirt_patch_site *start,
 379                    struct paravirt_patch_site *end)
 380{
 381        struct paravirt_patch_site *p;
 382        char insnbuf[MAX_PATCH_LEN];
 383
 384        if (noreplace_paravirt)
 385                return;
 386
 387        for (p = start; p < end; p++) {
 388                unsigned int used;
 389
 390                BUG_ON(p->len > MAX_PATCH_LEN);
 391                /* prep the buffer with the original instructions */
 392                memcpy(insnbuf, p->instr, p->len);
 393                used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
 394                                         (unsigned long)p->instr, p->len);
 395
 396                BUG_ON(used > p->len);
 397
 398                /* Pad the rest with nops */
 399                add_nops(insnbuf + used, p->len - used);
 400                text_poke(p->instr, insnbuf, p->len);
 401        }
 402}
 403extern struct paravirt_patch_site __start_parainstructions[],
 404        __stop_parainstructions[];
 405#endif  /* CONFIG_PARAVIRT */
 406
 407void __init alternative_instructions(void)
 408{
 409        unsigned long flags;
 410
 411        /* The patching is not fully atomic, so try to avoid local interruptions
 412           that might execute the to be patched code.
 413           Other CPUs are not running. */
 414        stop_nmi();
 415#ifdef CONFIG_X86_MCE
 416        stop_mce();
 417#endif
 418
 419        local_irq_save(flags);
 420        apply_alternatives(__alt_instructions, __alt_instructions_end);
 421
 422        /* switch to patch-once-at-boottime-only mode and free the
 423         * tables in case we know the number of CPUs will never ever
 424         * change */
 425#ifdef CONFIG_HOTPLUG_CPU
 426        if (num_possible_cpus() < 2)
 427                smp_alt_once = 1;
 428#endif
 429
 430#ifdef CONFIG_SMP
 431        if (smp_alt_once) {
 432                if (1 == num_possible_cpus()) {
 433                        printk(KERN_INFO "SMP alternatives: switching to UP code\n");
 434                        set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
 435                        set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
 436                        alternatives_smp_unlock(__smp_locks, __smp_locks_end,
 437                                                _text, _etext);
 438                }
 439        } else {
 440                alternatives_smp_module_add(NULL, "core kernel",
 441                                            __smp_locks, __smp_locks_end,
 442                                            _text, _etext);
 443                alternatives_smp_switch(0);
 444        }
 445#endif
 446        apply_paravirt(__parainstructions, __parainstructions_end);
 447        local_irq_restore(flags);
 448
 449        if (smp_alt_once)
 450                free_init_pages("SMP alternatives",
 451                                (unsigned long)__smp_locks,
 452                                (unsigned long)__smp_locks_end);
 453
 454        restart_nmi();
 455#ifdef CONFIG_X86_MCE
 456        restart_mce();
 457#endif
 458}
 459
 460/*
 461 * Warning:
 462 * When you use this code to patch more than one byte of an instruction
 463 * you need to make sure that other CPUs cannot execute this code in parallel.
 464 * Also no thread must be currently preempted in the middle of these instructions.
 465 * And on the local CPU you need to be protected again NMI or MCE handlers
 466 * seeing an inconsistent instruction while you patch.
 467 */
 468void __kprobes text_poke(void *addr, unsigned char *opcode, int len)
 469{
 470        memcpy(addr, opcode, len);
 471        sync_core();
 472        /* Could also do a CLFLUSH here to speed up CPU recovery; but
 473           that causes hangs on some VIA CPUs. */
 474}
 475