linux/fs/proc/kcore.c
<<
>>
Prefs
   1/*
   2 *      fs/proc/kcore.c kernel ELF core dumper
   3 *
   4 *      Modelled on fs/exec.c:aout_core_dump()
   5 *      Jeremy Fitzhardinge <jeremy@sw.oz.au>
   6 *      ELF version written by David Howells <David.Howells@nexor.co.uk>
   7 *      Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
   8 *      Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
   9 *      Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
  10 */
  11
  12#include <linux/mm.h>
  13#include <linux/proc_fs.h>
  14#include <linux/user.h>
  15#include <linux/capability.h>
  16#include <linux/elf.h>
  17#include <linux/elfcore.h>
  18#include <linux/vmalloc.h>
  19#include <linux/highmem.h>
  20#include <linux/printk.h>
  21#include <linux/bootmem.h>
  22#include <linux/init.h>
  23#include <linux/slab.h>
  24#include <asm/uaccess.h>
  25#include <asm/io.h>
  26#include <linux/list.h>
  27#include <linux/ioport.h>
  28#include <linux/memory.h>
  29#include <asm/sections.h>
  30
  31#define CORE_STR "CORE"
  32
  33#ifndef ELF_CORE_EFLAGS
  34#define ELF_CORE_EFLAGS 0
  35#endif
  36
  37static struct proc_dir_entry *proc_root_kcore;
  38
  39
  40#ifndef kc_vaddr_to_offset
  41#define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
  42#endif
  43#ifndef kc_offset_to_vaddr
  44#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
  45#endif
  46
  47/* An ELF note in memory */
  48struct memelfnote
  49{
  50        const char *name;
  51        int type;
  52        unsigned int datasz;
  53        void *data;
  54};
  55
  56static LIST_HEAD(kclist_head);
  57static DEFINE_RWLOCK(kclist_lock);
  58static int kcore_need_update = 1;
  59
  60void
  61kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
  62{
  63        new->addr = (unsigned long)addr;
  64        new->size = size;
  65        new->type = type;
  66
  67        write_lock(&kclist_lock);
  68        list_add_tail(&new->list, &kclist_head);
  69        write_unlock(&kclist_lock);
  70}
  71
  72static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
  73{
  74        size_t try, size;
  75        struct kcore_list *m;
  76
  77        *nphdr = 1; /* PT_NOTE */
  78        size = 0;
  79
  80        list_for_each_entry(m, &kclist_head, list) {
  81                try = kc_vaddr_to_offset((size_t)m->addr + m->size);
  82                if (try > size)
  83                        size = try;
  84                *nphdr = *nphdr + 1;
  85        }
  86        *elf_buflen =   sizeof(struct elfhdr) + 
  87                        (*nphdr + 2)*sizeof(struct elf_phdr) + 
  88                        3 * ((sizeof(struct elf_note)) +
  89                             roundup(sizeof(CORE_STR), 4)) +
  90                        roundup(sizeof(struct elf_prstatus), 4) +
  91                        roundup(sizeof(struct elf_prpsinfo), 4) +
  92                        roundup(sizeof(struct task_struct), 4);
  93        *elf_buflen = PAGE_ALIGN(*elf_buflen);
  94        return size + *elf_buflen;
  95}
  96
  97static void free_kclist_ents(struct list_head *head)
  98{
  99        struct kcore_list *tmp, *pos;
 100
 101        list_for_each_entry_safe(pos, tmp, head, list) {
 102                list_del(&pos->list);
 103                kfree(pos);
 104        }
 105}
 106/*
 107 * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
 108 */
 109static void __kcore_update_ram(struct list_head *list)
 110{
 111        int nphdr;
 112        size_t size;
 113        struct kcore_list *tmp, *pos;
 114        LIST_HEAD(garbage);
 115
 116        write_lock(&kclist_lock);
 117        if (kcore_need_update) {
 118                list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
 119                        if (pos->type == KCORE_RAM
 120                                || pos->type == KCORE_VMEMMAP)
 121                                list_move(&pos->list, &garbage);
 122                }
 123                list_splice_tail(list, &kclist_head);
 124        } else
 125                list_splice(list, &garbage);
 126        kcore_need_update = 0;
 127        proc_root_kcore->size = get_kcore_size(&nphdr, &size);
 128        write_unlock(&kclist_lock);
 129
 130        free_kclist_ents(&garbage);
 131}
 132
 133
 134#ifdef CONFIG_HIGHMEM
 135/*
 136 * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
 137 * because memory hole is not as big as !HIGHMEM case.
 138 * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
 139 */
 140static int kcore_update_ram(void)
 141{
 142        LIST_HEAD(head);
 143        struct kcore_list *ent;
 144        int ret = 0;
 145
 146        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 147        if (!ent)
 148                return -ENOMEM;
 149        ent->addr = (unsigned long)__va(0);
 150        ent->size = max_low_pfn << PAGE_SHIFT;
 151        ent->type = KCORE_RAM;
 152        list_add(&ent->list, &head);
 153        __kcore_update_ram(&head);
 154        return ret;
 155}
 156
 157#else /* !CONFIG_HIGHMEM */
 158
 159#ifdef CONFIG_SPARSEMEM_VMEMMAP
 160/* calculate vmemmap's address from given system ram pfn and register it */
 161static int
 162get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 163{
 164        unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
 165        unsigned long nr_pages = ent->size >> PAGE_SHIFT;
 166        unsigned long start, end;
 167        struct kcore_list *vmm, *tmp;
 168
 169
 170        start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
 171        end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
 172        end = ALIGN(end, PAGE_SIZE);
 173        /* overlap check (because we have to align page */
 174        list_for_each_entry(tmp, head, list) {
 175                if (tmp->type != KCORE_VMEMMAP)
 176                        continue;
 177                if (start < tmp->addr + tmp->size)
 178                        if (end > tmp->addr)
 179                                end = tmp->addr;
 180        }
 181        if (start < end) {
 182                vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
 183                if (!vmm)
 184                        return 0;
 185                vmm->addr = start;
 186                vmm->size = end - start;
 187                vmm->type = KCORE_VMEMMAP;
 188                list_add_tail(&vmm->list, head);
 189        }
 190        return 1;
 191
 192}
 193#else
 194static int
 195get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
 196{
 197        return 1;
 198}
 199
 200#endif
 201
 202static int
 203kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 204{
 205        struct list_head *head = (struct list_head *)arg;
 206        struct kcore_list *ent;
 207
 208        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
 209        if (!ent)
 210                return -ENOMEM;
 211        ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
 212        ent->size = nr_pages << PAGE_SHIFT;
 213
 214        /* Sanity check: Can happen in 32bit arch...maybe */
 215        if (ent->addr < (unsigned long) __va(0))
 216                goto free_out;
 217
 218        /* cut not-mapped area. ....from ppc-32 code. */
 219        if (ULONG_MAX - ent->addr < ent->size)
 220                ent->size = ULONG_MAX - ent->addr;
 221
 222        /* cut when vmalloc() area is higher than direct-map area */
 223        if (VMALLOC_START > (unsigned long)__va(0)) {
 224                if (ent->addr > VMALLOC_START)
 225                        goto free_out;
 226                if (VMALLOC_START - ent->addr < ent->size)
 227                        ent->size = VMALLOC_START - ent->addr;
 228        }
 229
 230        ent->type = KCORE_RAM;
 231        list_add_tail(&ent->list, head);
 232
 233        if (!get_sparsemem_vmemmap_info(ent, head)) {
 234                list_del(&ent->list);
 235                goto free_out;
 236        }
 237
 238        return 0;
 239free_out:
 240        kfree(ent);
 241        return 1;
 242}
 243
 244static int kcore_update_ram(void)
 245{
 246        int nid, ret;
 247        unsigned long end_pfn;
 248        LIST_HEAD(head);
 249
 250        /* Not inialized....update now */
 251        /* find out "max pfn" */
 252        end_pfn = 0;
 253        for_each_node_state(nid, N_MEMORY) {
 254                unsigned long node_end;
 255                node_end  = NODE_DATA(nid)->node_start_pfn +
 256                        NODE_DATA(nid)->node_spanned_pages;
 257                if (end_pfn < node_end)
 258                        end_pfn = node_end;
 259        }
 260        /* scan 0 to max_pfn */
 261        ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
 262        if (ret) {
 263                free_kclist_ents(&head);
 264                return -ENOMEM;
 265        }
 266        __kcore_update_ram(&head);
 267        return ret;
 268}
 269#endif /* CONFIG_HIGHMEM */
 270
 271/*****************************************************************************/
 272/*
 273 * determine size of ELF note
 274 */
 275static int notesize(struct memelfnote *en)
 276{
 277        int sz;
 278
 279        sz = sizeof(struct elf_note);
 280        sz += roundup((strlen(en->name) + 1), 4);
 281        sz += roundup(en->datasz, 4);
 282
 283        return sz;
 284} /* end notesize() */
 285
 286/*****************************************************************************/
 287/*
 288 * store a note in the header buffer
 289 */
 290static char *storenote(struct memelfnote *men, char *bufp)
 291{
 292        struct elf_note en;
 293
 294#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
 295
 296        en.n_namesz = strlen(men->name) + 1;
 297        en.n_descsz = men->datasz;
 298        en.n_type = men->type;
 299
 300        DUMP_WRITE(&en, sizeof(en));
 301        DUMP_WRITE(men->name, en.n_namesz);
 302
 303        /* XXX - cast from long long to long to avoid need for libgcc.a */
 304        bufp = (char*) roundup((unsigned long)bufp,4);
 305        DUMP_WRITE(men->data, men->datasz);
 306        bufp = (char*) roundup((unsigned long)bufp,4);
 307
 308#undef DUMP_WRITE
 309
 310        return bufp;
 311} /* end storenote() */
 312
 313/*
 314 * store an ELF coredump header in the supplied buffer
 315 * nphdr is the number of elf_phdr to insert
 316 */
 317static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 318{
 319        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
 320        struct elf_prpsinfo prpsinfo;   /* NT_PRPSINFO */
 321        struct elf_phdr *nhdr, *phdr;
 322        struct elfhdr *elf;
 323        struct memelfnote notes[3];
 324        off_t offset = 0;
 325        struct kcore_list *m;
 326
 327        /* setup ELF header */
 328        elf = (struct elfhdr *) bufp;
 329        bufp += sizeof(struct elfhdr);
 330        offset += sizeof(struct elfhdr);
 331        memcpy(elf->e_ident, ELFMAG, SELFMAG);
 332        elf->e_ident[EI_CLASS]  = ELF_CLASS;
 333        elf->e_ident[EI_DATA]   = ELF_DATA;
 334        elf->e_ident[EI_VERSION]= EV_CURRENT;
 335        elf->e_ident[EI_OSABI] = ELF_OSABI;
 336        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
 337        elf->e_type     = ET_CORE;
 338        elf->e_machine  = ELF_ARCH;
 339        elf->e_version  = EV_CURRENT;
 340        elf->e_entry    = 0;
 341        elf->e_phoff    = sizeof(struct elfhdr);
 342        elf->e_shoff    = 0;
 343        elf->e_flags    = ELF_CORE_EFLAGS;
 344        elf->e_ehsize   = sizeof(struct elfhdr);
 345        elf->e_phentsize= sizeof(struct elf_phdr);
 346        elf->e_phnum    = nphdr;
 347        elf->e_shentsize= 0;
 348        elf->e_shnum    = 0;
 349        elf->e_shstrndx = 0;
 350
 351        /* setup ELF PT_NOTE program header */
 352        nhdr = (struct elf_phdr *) bufp;
 353        bufp += sizeof(struct elf_phdr);
 354        offset += sizeof(struct elf_phdr);
 355        nhdr->p_type    = PT_NOTE;
 356        nhdr->p_offset  = 0;
 357        nhdr->p_vaddr   = 0;
 358        nhdr->p_paddr   = 0;
 359        nhdr->p_filesz  = 0;
 360        nhdr->p_memsz   = 0;
 361        nhdr->p_flags   = 0;
 362        nhdr->p_align   = 0;
 363
 364        /* setup ELF PT_LOAD program header for every area */
 365        list_for_each_entry(m, &kclist_head, list) {
 366                phdr = (struct elf_phdr *) bufp;
 367                bufp += sizeof(struct elf_phdr);
 368                offset += sizeof(struct elf_phdr);
 369
 370                phdr->p_type    = PT_LOAD;
 371                phdr->p_flags   = PF_R|PF_W|PF_X;
 372                phdr->p_offset  = kc_vaddr_to_offset(m->addr) + dataoff;
 373                phdr->p_vaddr   = (size_t)m->addr;
 374                phdr->p_paddr   = 0;
 375                phdr->p_filesz  = phdr->p_memsz = m->size;
 376                phdr->p_align   = PAGE_SIZE;
 377        }
 378
 379        /*
 380         * Set up the notes in similar form to SVR4 core dumps made
 381         * with info from their /proc.
 382         */
 383        nhdr->p_offset  = offset;
 384
 385        /* set up the process status */
 386        notes[0].name = CORE_STR;
 387        notes[0].type = NT_PRSTATUS;
 388        notes[0].datasz = sizeof(struct elf_prstatus);
 389        notes[0].data = &prstatus;
 390
 391        memset(&prstatus, 0, sizeof(struct elf_prstatus));
 392
 393        nhdr->p_filesz  = notesize(&notes[0]);
 394        bufp = storenote(&notes[0], bufp);
 395
 396        /* set up the process info */
 397        notes[1].name   = CORE_STR;
 398        notes[1].type   = NT_PRPSINFO;
 399        notes[1].datasz = sizeof(struct elf_prpsinfo);
 400        notes[1].data   = &prpsinfo;
 401
 402        memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
 403        prpsinfo.pr_state       = 0;
 404        prpsinfo.pr_sname       = 'R';
 405        prpsinfo.pr_zomb        = 0;
 406
 407        strcpy(prpsinfo.pr_fname, "vmlinux");
 408        strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ);
 409
 410        nhdr->p_filesz  += notesize(&notes[1]);
 411        bufp = storenote(&notes[1], bufp);
 412
 413        /* set up the task structure */
 414        notes[2].name   = CORE_STR;
 415        notes[2].type   = NT_TASKSTRUCT;
 416        notes[2].datasz = sizeof(struct task_struct);
 417        notes[2].data   = current;
 418
 419        nhdr->p_filesz  += notesize(&notes[2]);
 420        bufp = storenote(&notes[2], bufp);
 421
 422} /* end elf_kcore_store_hdr() */
 423
 424/*****************************************************************************/
 425/*
 426 * read from the ELF header and then kernel memory
 427 */
 428static ssize_t
 429read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 430{
 431        ssize_t acc = 0;
 432        size_t size, tsz;
 433        size_t elf_buflen;
 434        int nphdr;
 435        unsigned long start;
 436
 437        read_lock(&kclist_lock);
 438        size = get_kcore_size(&nphdr, &elf_buflen);
 439
 440        if (buflen == 0 || *fpos >= size) {
 441                read_unlock(&kclist_lock);
 442                return 0;
 443        }
 444
 445        /* trim buflen to not go beyond EOF */
 446        if (buflen > size - *fpos)
 447                buflen = size - *fpos;
 448
 449        /* construct an ELF core header if we'll need some of it */
 450        if (*fpos < elf_buflen) {
 451                char * elf_buf;
 452
 453                tsz = elf_buflen - *fpos;
 454                if (buflen < tsz)
 455                        tsz = buflen;
 456                elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
 457                if (!elf_buf) {
 458                        read_unlock(&kclist_lock);
 459                        return -ENOMEM;
 460                }
 461                elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
 462                read_unlock(&kclist_lock);
 463                if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
 464                        kfree(elf_buf);
 465                        return -EFAULT;
 466                }
 467                kfree(elf_buf);
 468                buflen -= tsz;
 469                *fpos += tsz;
 470                buffer += tsz;
 471                acc += tsz;
 472
 473                /* leave now if filled buffer already */
 474                if (buflen == 0)
 475                        return acc;
 476        } else
 477                read_unlock(&kclist_lock);
 478
 479        /*
 480         * Check to see if our file offset matches with any of
 481         * the addresses in the elf_phdr on our list.
 482         */
 483        start = kc_offset_to_vaddr(*fpos - elf_buflen);
 484        if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
 485                tsz = buflen;
 486                
 487        while (buflen) {
 488                struct kcore_list *m;
 489
 490                read_lock(&kclist_lock);
 491                list_for_each_entry(m, &kclist_head, list) {
 492                        if (start >= m->addr && start < (m->addr+m->size))
 493                                break;
 494                }
 495                read_unlock(&kclist_lock);
 496
 497                if (&m->list == &kclist_head) {
 498                        if (clear_user(buffer, tsz))
 499                                return -EFAULT;
 500                } else if (is_vmalloc_or_module_addr((void *)start)) {
 501                        char * elf_buf;
 502
 503                        elf_buf = kzalloc(tsz, GFP_KERNEL);
 504                        if (!elf_buf)
 505                                return -ENOMEM;
 506                        vread(elf_buf, (char *)start, tsz);
 507                        /* we have to zero-fill user buffer even if no read */
 508                        if (copy_to_user(buffer, elf_buf, tsz)) {
 509                                kfree(elf_buf);
 510                                return -EFAULT;
 511                        }
 512                        kfree(elf_buf);
 513                } else {
 514                        if (kern_addr_valid(start)) {
 515                                unsigned long n;
 516
 517                                n = copy_to_user(buffer, (char *)start, tsz);
 518                                /*
 519                                 * We cannot distinguish between fault on source
 520                                 * and fault on destination. When this happens
 521                                 * we clear too and hope it will trigger the
 522                                 * EFAULT again.
 523                                 */
 524                                if (n) { 
 525                                        if (clear_user(buffer + tsz - n,
 526                                                                n))
 527                                                return -EFAULT;
 528                                }
 529                        } else {
 530                                if (clear_user(buffer, tsz))
 531                                        return -EFAULT;
 532                        }
 533                }
 534                buflen -= tsz;
 535                *fpos += tsz;
 536                buffer += tsz;
 537                acc += tsz;
 538                start += tsz;
 539                tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
 540        }
 541
 542        return acc;
 543}
 544
 545
 546static int open_kcore(struct inode *inode, struct file *filp)
 547{
 548        if (!capable(CAP_SYS_RAWIO))
 549                return -EPERM;
 550        if (kcore_need_update)
 551                kcore_update_ram();
 552        if (i_size_read(inode) != proc_root_kcore->size) {
 553                mutex_lock(&inode->i_mutex);
 554                i_size_write(inode, proc_root_kcore->size);
 555                mutex_unlock(&inode->i_mutex);
 556        }
 557        return 0;
 558}
 559
 560
 561static const struct file_operations proc_kcore_operations = {
 562        .read           = read_kcore,
 563        .open           = open_kcore,
 564        .llseek         = default_llseek,
 565};
 566
 567#ifdef CONFIG_MEMORY_HOTPLUG
 568/* just remember that we have to update kcore */
 569static int __meminit kcore_callback(struct notifier_block *self,
 570                                    unsigned long action, void *arg)
 571{
 572        switch (action) {
 573        case MEM_ONLINE:
 574        case MEM_OFFLINE:
 575                write_lock(&kclist_lock);
 576                kcore_need_update = 1;
 577                write_unlock(&kclist_lock);
 578        }
 579        return NOTIFY_OK;
 580}
 581#endif
 582
 583
 584static struct kcore_list kcore_vmalloc;
 585
 586#ifdef CONFIG_ARCH_PROC_KCORE_TEXT
 587static struct kcore_list kcore_text;
 588/*
 589 * If defined, special segment is used for mapping kernel text instead of
 590 * direct-map area. We need to create special TEXT section.
 591 */
 592static void __init proc_kcore_text_init(void)
 593{
 594        kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT);
 595}
 596#else
 597static void __init proc_kcore_text_init(void)
 598{
 599}
 600#endif
 601
 602#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
 603/*
 604 * MODULES_VADDR has no intersection with VMALLOC_ADDR.
 605 */
 606struct kcore_list kcore_modules;
 607static void __init add_modules_range(void)
 608{
 609        kclist_add(&kcore_modules, (void *)MODULES_VADDR,
 610                        MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
 611}
 612#else
 613static void __init add_modules_range(void)
 614{
 615}
 616#endif
 617
 618static int __init proc_kcore_init(void)
 619{
 620        proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
 621                                      &proc_kcore_operations);
 622        if (!proc_root_kcore) {
 623                pr_err("couldn't create /proc/kcore\n");
 624                return 0; /* Always returns 0. */
 625        }
 626        /* Store text area if it's special */
 627        proc_kcore_text_init();
 628        /* Store vmalloc area */
 629        kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 630                VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
 631        add_modules_range();
 632        /* Store direct-map area from physical memory map */
 633        kcore_update_ram();
 634        hotplug_memory_notifier(kcore_callback, 0);
 635
 636        return 0;
 637}
 638module_init(proc_kcore_init);
 639