linux/arch/x86/include/asm/desc.h
<<
>>
Prefs
   1#ifndef _ASM_X86_DESC_H
   2#define _ASM_X86_DESC_H
   3
   4#include <asm/desc_defs.h>
   5#include <asm/ldt.h>
   6#include <asm/mmu.h>
   7#include <asm/fixmap.h>
   8
   9#include <linux/smp.h>
  10#include <linux/percpu.h>
  11
  12static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info)
  13{
  14        desc->limit0            = info->limit & 0x0ffff;
  15
  16        desc->base0             = (info->base_addr & 0x0000ffff);
  17        desc->base1             = (info->base_addr & 0x00ff0000) >> 16;
  18
  19        desc->type              = (info->read_exec_only ^ 1) << 1;
  20        desc->type             |= info->contents << 2;
  21
  22        desc->s                 = 1;
  23        desc->dpl               = 0x3;
  24        desc->p                 = info->seg_not_present ^ 1;
  25        desc->limit             = (info->limit & 0xf0000) >> 16;
  26        desc->avl               = info->useable;
  27        desc->d                 = info->seg_32bit;
  28        desc->g                 = info->limit_in_pages;
  29
  30        desc->base2             = (info->base_addr & 0xff000000) >> 24;
  31        /*
  32         * Don't allow setting of the lm bit. It would confuse
  33         * user_64bit_mode and would get overridden by sysret anyway.
  34         */
  35        desc->l                 = 0;
  36}
  37
  38extern struct desc_ptr idt_descr;
  39extern gate_desc idt_table[];
  40extern const struct desc_ptr debug_idt_descr;
  41extern gate_desc debug_idt_table[];
  42
  43struct gdt_page {
  44        struct desc_struct gdt[GDT_ENTRIES];
  45} __attribute__((aligned(PAGE_SIZE)));
  46
  47DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
  48
  49/* Provide the original GDT */
  50static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
  51{
  52        return per_cpu(gdt_page, cpu).gdt;
  53}
  54
  55/* Provide the current original GDT */
  56static inline struct desc_struct *get_current_gdt_rw(void)
  57{
  58        return this_cpu_ptr(&gdt_page)->gdt;
  59}
  60
  61/* Get the fixmap index for a specific processor */
  62static inline unsigned int get_cpu_gdt_ro_index(int cpu)
  63{
  64        return FIX_GDT_REMAP_BEGIN + cpu;
  65}
  66
  67/* Provide the fixmap address of the remapped GDT */
  68static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
  69{
  70        unsigned int idx = get_cpu_gdt_ro_index(cpu);
  71        return (struct desc_struct *)__fix_to_virt(idx);
  72}
  73
  74/* Provide the current read-only GDT */
  75static inline struct desc_struct *get_current_gdt_ro(void)
  76{
  77        return get_cpu_gdt_ro(smp_processor_id());
  78}
  79
  80/* Provide the physical address of the GDT page. */
  81static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
  82{
  83        return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
  84}
  85
  86#ifdef CONFIG_X86_64
  87
  88static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
  89                             unsigned dpl, unsigned ist, unsigned seg)
  90{
  91        gate->offset_low        = PTR_LOW(func);
  92        gate->segment           = __KERNEL_CS;
  93        gate->ist               = ist;
  94        gate->p                 = 1;
  95        gate->dpl               = dpl;
  96        gate->zero0             = 0;
  97        gate->zero1             = 0;
  98        gate->type              = type;
  99        gate->offset_middle     = PTR_MIDDLE(func);
 100        gate->offset_high       = PTR_HIGH(func);
 101}
 102
 103#else
 104static inline void pack_gate(gate_desc *gate, unsigned char type,
 105                             unsigned long base, unsigned dpl, unsigned flags,
 106                             unsigned short seg)
 107{
 108        gate->a = (seg << 16) | (base & 0xffff);
 109        gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
 110}
 111
 112#endif
 113
 114static inline int desc_empty(const void *ptr)
 115{
 116        const u32 *desc = ptr;
 117
 118        return !(desc[0] | desc[1]);
 119}
 120
 121#ifdef CONFIG_PARAVIRT
 122#include <asm/paravirt.h>
 123#else
 124#define load_TR_desc()                          native_load_tr_desc()
 125#define load_gdt(dtr)                           native_load_gdt(dtr)
 126#define load_idt(dtr)                           native_load_idt(dtr)
 127#define load_tr(tr)                             asm volatile("ltr %0"::"m" (tr))
 128#define load_ldt(ldt)                           asm volatile("lldt %0"::"m" (ldt))
 129
 130#define store_gdt(dtr)                          native_store_gdt(dtr)
 131#define store_idt(dtr)                          native_store_idt(dtr)
 132#define store_tr(tr)                            (tr = native_store_tr())
 133
 134#define load_TLS(t, cpu)                        native_load_tls(t, cpu)
 135#define set_ldt                                 native_set_ldt
 136
 137#define write_ldt_entry(dt, entry, desc)        native_write_ldt_entry(dt, entry, desc)
 138#define write_gdt_entry(dt, entry, desc, type)  native_write_gdt_entry(dt, entry, desc, type)
 139#define write_idt_entry(dt, entry, g)           native_write_idt_entry(dt, entry, g)
 140
 141static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
 142{
 143}
 144
 145static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
 146{
 147}
 148#endif  /* CONFIG_PARAVIRT */
 149
 150#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
 151
 152static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate)
 153{
 154        memcpy(&idt[entry], gate, sizeof(*gate));
 155}
 156
 157static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc)
 158{
 159        memcpy(&ldt[entry], desc, 8);
 160}
 161
 162static inline void
 163native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int type)
 164{
 165        unsigned int size;
 166
 167        switch (type) {
 168        case DESC_TSS:  size = sizeof(tss_desc);        break;
 169        case DESC_LDT:  size = sizeof(ldt_desc);        break;
 170        default:        size = sizeof(*gdt);            break;
 171        }
 172
 173        memcpy(&gdt[entry], desc, size);
 174}
 175
 176static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
 177                                   unsigned long limit, unsigned char type,
 178                                   unsigned char flags)
 179{
 180        desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
 181        desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
 182                (limit & 0x000f0000) | ((type & 0xff) << 8) |
 183                ((flags & 0xf) << 20);
 184        desc->p = 1;
 185}
 186
 187
 188static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size)
 189{
 190#ifdef CONFIG_X86_64
 191        struct ldttss_desc64 *desc = d;
 192
 193        memset(desc, 0, sizeof(*desc));
 194
 195        desc->limit0            = size & 0xFFFF;
 196        desc->base0             = PTR_LOW(addr);
 197        desc->base1             = PTR_MIDDLE(addr) & 0xFF;
 198        desc->type              = type;
 199        desc->p                 = 1;
 200        desc->limit1            = (size >> 16) & 0xF;
 201        desc->base2             = (PTR_MIDDLE(addr) >> 8) & 0xFF;
 202        desc->base3             = PTR_HIGH(addr);
 203#else
 204        pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
 205#endif
 206}
 207
 208static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
 209{
 210        struct desc_struct *d = get_cpu_gdt_rw(cpu);
 211        tss_desc tss;
 212
 213        set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
 214                              __KERNEL_TSS_LIMIT);
 215        write_gdt_entry(d, entry, &tss, DESC_TSS);
 216}
 217
 218#define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
 219
 220static inline void native_set_ldt(const void *addr, unsigned int entries)
 221{
 222        if (likely(entries == 0))
 223                asm volatile("lldt %w0"::"q" (0));
 224        else {
 225                unsigned cpu = smp_processor_id();
 226                ldt_desc ldt;
 227
 228                set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
 229                                      entries * LDT_ENTRY_SIZE - 1);
 230                write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
 231                                &ldt, DESC_LDT);
 232                asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
 233        }
 234}
 235
 236static inline void native_load_gdt(const struct desc_ptr *dtr)
 237{
 238        asm volatile("lgdt %0"::"m" (*dtr));
 239}
 240
 241static inline void native_load_idt(const struct desc_ptr *dtr)
 242{
 243        asm volatile("lidt %0"::"m" (*dtr));
 244}
 245
 246static inline void native_store_gdt(struct desc_ptr *dtr)
 247{
 248        asm volatile("sgdt %0":"=m" (*dtr));
 249}
 250
 251static inline void native_store_idt(struct desc_ptr *dtr)
 252{
 253        asm volatile("sidt %0":"=m" (*dtr));
 254}
 255
 256/*
 257 * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
 258 * a read-only remapping. To prevent a page fault, the GDT is switched to the
 259 * original writeable version when needed.
 260 */
 261#ifdef CONFIG_X86_64
 262static inline void native_load_tr_desc(void)
 263{
 264        struct desc_ptr gdt;
 265        int cpu = raw_smp_processor_id();
 266        bool restore = 0;
 267        struct desc_struct *fixmap_gdt;
 268
 269        native_store_gdt(&gdt);
 270        fixmap_gdt = get_cpu_gdt_ro(cpu);
 271
 272        /*
 273         * If the current GDT is the read-only fixmap, swap to the original
 274         * writeable version. Swap back at the end.
 275         */
 276        if (gdt.address == (unsigned long)fixmap_gdt) {
 277                load_direct_gdt(cpu);
 278                restore = 1;
 279        }
 280        asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
 281        if (restore)
 282                load_fixmap_gdt(cpu);
 283}
 284#else
 285static inline void native_load_tr_desc(void)
 286{
 287        asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
 288}
 289#endif
 290
 291static inline unsigned long native_store_tr(void)
 292{
 293        unsigned long tr;
 294
 295        asm volatile("str %0":"=r" (tr));
 296
 297        return tr;
 298}
 299
 300static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
 301{
 302        struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
 303        unsigned int i;
 304
 305        for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
 306                gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
 307}
 308
 309DECLARE_PER_CPU(bool, __tss_limit_invalid);
 310
 311static inline void force_reload_TR(void)
 312{
 313        struct desc_struct *d = get_current_gdt_rw();
 314        tss_desc tss;
 315
 316        memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
 317
 318        /*
 319         * LTR requires an available TSS, and the TSS is currently
 320         * busy.  Make it be available so that LTR will work.
 321         */
 322        tss.type = DESC_TSS;
 323        write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
 324
 325        load_TR_desc();
 326        this_cpu_write(__tss_limit_invalid, false);
 327}
 328
 329/*
 330 * Call this if you need the TSS limit to be correct, which should be the case
 331 * if and only if you have TIF_IO_BITMAP set or you're switching to a task
 332 * with TIF_IO_BITMAP set.
 333 */
 334static inline void refresh_tss_limit(void)
 335{
 336        DEBUG_LOCKS_WARN_ON(preemptible());
 337
 338        if (unlikely(this_cpu_read(__tss_limit_invalid)))
 339                force_reload_TR();
 340}
 341
 342/*
 343 * If you do something evil that corrupts the cached TSS limit (I'm looking
 344 * at you, VMX exits), call this function.
 345 *
 346 * The optimization here is that the TSS limit only matters for Linux if the
 347 * IO bitmap is in use.  If the TSS limit gets forced to its minimum value,
 348 * everything works except that IO bitmap will be ignored and all CPL 3 IO
 349 * instructions will #GP, which is exactly what we want for normal tasks.
 350 */
 351static inline void invalidate_tss_limit(void)
 352{
 353        DEBUG_LOCKS_WARN_ON(preemptible());
 354
 355        if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
 356                force_reload_TR();
 357        else
 358                this_cpu_write(__tss_limit_invalid, true);
 359}
 360
 361/* This intentionally ignores lm, since 32-bit apps don't have that field. */
 362#define LDT_empty(info)                                 \
 363        ((info)->base_addr              == 0    &&      \
 364         (info)->limit                  == 0    &&      \
 365         (info)->contents               == 0    &&      \
 366         (info)->read_exec_only         == 1    &&      \
 367         (info)->seg_32bit              == 0    &&      \
 368         (info)->limit_in_pages         == 0    &&      \
 369         (info)->seg_not_present        == 1    &&      \
 370         (info)->useable                == 0)
 371
 372/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */
 373static inline bool LDT_zero(const struct user_desc *info)
 374{
 375        return (info->base_addr         == 0 &&
 376                info->limit             == 0 &&
 377                info->contents          == 0 &&
 378                info->read_exec_only    == 0 &&
 379                info->seg_32bit         == 0 &&
 380                info->limit_in_pages    == 0 &&
 381                info->seg_not_present   == 0 &&
 382                info->useable           == 0);
 383}
 384
 385static inline void clear_LDT(void)
 386{
 387        set_ldt(NULL, 0);
 388}
 389
 390static inline unsigned long get_desc_base(const struct desc_struct *desc)
 391{
 392        return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
 393}
 394
 395static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
 396{
 397        desc->base0 = base & 0xffff;
 398        desc->base1 = (base >> 16) & 0xff;
 399        desc->base2 = (base >> 24) & 0xff;
 400}
 401
 402static inline unsigned long get_desc_limit(const struct desc_struct *desc)
 403{
 404        return desc->limit0 | (desc->limit << 16);
 405}
 406
 407static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
 408{
 409        desc->limit0 = limit & 0xffff;
 410        desc->limit = (limit >> 16) & 0xf;
 411}
 412
 413#ifdef CONFIG_X86_64
 414static inline void set_nmi_gate(int gate, void *addr)
 415{
 416        gate_desc s;
 417
 418        pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
 419        write_idt_entry(debug_idt_table, gate, &s);
 420}
 421#endif
 422
 423#ifdef CONFIG_TRACING
 424extern struct desc_ptr trace_idt_descr;
 425extern gate_desc trace_idt_table[];
 426static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
 427{
 428        write_idt_entry(trace_idt_table, entry, gate);
 429}
 430
 431static inline void _trace_set_gate(int gate, unsigned type, void *addr,
 432                                   unsigned dpl, unsigned ist, unsigned seg)
 433{
 434        gate_desc s;
 435
 436        pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
 437        /*
 438         * does not need to be atomic because it is only done once at
 439         * setup time
 440         */
 441        write_trace_idt_entry(gate, &s);
 442}
 443#else
 444static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
 445{
 446}
 447
 448#define _trace_set_gate(gate, type, addr, dpl, ist, seg)
 449#endif
 450
 451static inline void _set_gate(int gate, unsigned type, void *addr,
 452                             unsigned dpl, unsigned ist, unsigned seg)
 453{
 454        gate_desc s;
 455
 456        pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
 457        /*
 458         * does not need to be atomic because it is only done once at
 459         * setup time
 460         */
 461        write_idt_entry(idt_table, gate, &s);
 462        write_trace_idt_entry(gate, &s);
 463}
 464
 465/*
 466 * This needs to use 'idt_table' rather than 'idt', and
 467 * thus use the _nonmapped_ version of the IDT, as the
 468 * Pentium F0 0F bugfix can have resulted in the mapped
 469 * IDT being write-protected.
 470 */
 471#define set_intr_gate_notrace(n, addr)                                  \
 472        do {                                                            \
 473                BUG_ON((unsigned)n > 0xFF);                             \
 474                _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,        \
 475                          __KERNEL_CS);                                 \
 476        } while (0)
 477
 478#define set_intr_gate(n, addr)                                          \
 479        do {                                                            \
 480                set_intr_gate_notrace(n, addr);                         \
 481                _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
 482                                0, 0, __KERNEL_CS);                     \
 483        } while (0)
 484
 485extern int first_system_vector;
 486/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
 487extern unsigned long used_vectors[];
 488
 489static inline void alloc_system_vector(int vector)
 490{
 491        if (!test_bit(vector, used_vectors)) {
 492                set_bit(vector, used_vectors);
 493                if (first_system_vector > vector)
 494                        first_system_vector = vector;
 495        } else {
 496                BUG();
 497        }
 498}
 499
 500#define alloc_intr_gate(n, addr)                                \
 501        do {                                                    \
 502                alloc_system_vector(n);                         \
 503                set_intr_gate(n, addr);                         \
 504        } while (0)
 505
 506/*
 507 * This routine sets up an interrupt gate at directory privilege level 3.
 508 */
 509static inline void set_system_intr_gate(unsigned int n, void *addr)
 510{
 511        BUG_ON((unsigned)n > 0xFF);
 512        _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
 513}
 514
 515static inline void set_system_trap_gate(unsigned int n, void *addr)
 516{
 517        BUG_ON((unsigned)n > 0xFF);
 518        _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
 519}
 520
 521static inline void set_trap_gate(unsigned int n, void *addr)
 522{
 523        BUG_ON((unsigned)n > 0xFF);
 524        _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
 525}
 526
 527static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
 528{
 529        BUG_ON((unsigned)n > 0xFF);
 530        _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3));
 531}
 532
 533static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
 534{
 535        BUG_ON((unsigned)n > 0xFF);
 536        _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
 537}
 538
 539static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
 540{
 541        BUG_ON((unsigned)n > 0xFF);
 542        _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
 543}
 544
 545#ifdef CONFIG_X86_64
 546DECLARE_PER_CPU(u32, debug_idt_ctr);
 547static inline bool is_debug_idt_enabled(void)
 548{
 549        if (this_cpu_read(debug_idt_ctr))
 550                return true;
 551
 552        return false;
 553}
 554
 555static inline void load_debug_idt(void)
 556{
 557        load_idt((const struct desc_ptr *)&debug_idt_descr);
 558}
 559#else
 560static inline bool is_debug_idt_enabled(void)
 561{
 562        return false;
 563}
 564
 565static inline void load_debug_idt(void)
 566{
 567}
 568#endif
 569
 570#ifdef CONFIG_TRACING
 571extern atomic_t trace_idt_ctr;
 572static inline bool is_trace_idt_enabled(void)
 573{
 574        if (atomic_read(&trace_idt_ctr))
 575                return true;
 576
 577        return false;
 578}
 579
 580static inline void load_trace_idt(void)
 581{
 582        load_idt((const struct desc_ptr *)&trace_idt_descr);
 583}
 584#else
 585static inline bool is_trace_idt_enabled(void)
 586{
 587        return false;
 588}
 589
 590static inline void load_trace_idt(void)
 591{
 592}
 593#endif
 594
 595/*
 596 * The load_current_idt() must be called with interrupts disabled
 597 * to avoid races. That way the IDT will always be set back to the expected
 598 * descriptor. It's also called when a CPU is being initialized, and
 599 * that doesn't need to disable interrupts, as nothing should be
 600 * bothering the CPU then.
 601 */
 602static inline void load_current_idt(void)
 603{
 604        if (is_debug_idt_enabled())
 605                load_debug_idt();
 606        else if (is_trace_idt_enabled())
 607                load_trace_idt();
 608        else
 609                load_idt((const struct desc_ptr *)&idt_descr);
 610}
 611#endif /* _ASM_X86_DESC_H */
 612