linux/arch/s390/mm/vmem.c
<<
>>
Prefs
   1/*
   2 *    Copyright IBM Corp. 2006
   3 *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
   4 */
   5
   6#include <linux/bootmem.h>
   7#include <linux/pfn.h>
   8#include <linux/mm.h>
   9#include <linux/module.h>
  10#include <linux/list.h>
  11#include <linux/hugetlb.h>
  12#include <linux/slab.h>
  13#include <asm/pgalloc.h>
  14#include <asm/pgtable.h>
  15#include <asm/setup.h>
  16#include <asm/tlbflush.h>
  17#include <asm/sections.h>
  18
  19static DEFINE_MUTEX(vmem_mutex);
  20
  21struct memory_segment {
  22        struct list_head list;
  23        unsigned long start;
  24        unsigned long size;
  25};
  26
  27static LIST_HEAD(mem_segs);
  28
  29static void __ref *vmem_alloc_pages(unsigned int order)
  30{
  31        if (slab_is_available())
  32                return (void *)__get_free_pages(GFP_KERNEL, order);
  33        return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
  34}
  35
  36static inline pud_t *vmem_pud_alloc(void)
  37{
  38        pud_t *pud = NULL;
  39
  40#ifdef CONFIG_64BIT
  41        pud = vmem_alloc_pages(2);
  42        if (!pud)
  43                return NULL;
  44        clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
  45#endif
  46        return pud;
  47}
  48
  49static inline pmd_t *vmem_pmd_alloc(void)
  50{
  51        pmd_t *pmd = NULL;
  52
  53#ifdef CONFIG_64BIT
  54        pmd = vmem_alloc_pages(2);
  55        if (!pmd)
  56                return NULL;
  57        clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
  58#endif
  59        return pmd;
  60}
  61
  62static pte_t __ref *vmem_pte_alloc(unsigned long address)
  63{
  64        pte_t *pte;
  65
  66        if (slab_is_available())
  67                pte = (pte_t *) page_table_alloc(&init_mm, address);
  68        else
  69                pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
  70        if (!pte)
  71                return NULL;
  72        clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
  73                    PTRS_PER_PTE * sizeof(pte_t));
  74        return pte;
  75}
  76
  77/*
  78 * Add a physical memory range to the 1:1 mapping.
  79 */
  80static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
  81{
  82        unsigned long end = start + size;
  83        unsigned long address = start;
  84        pgd_t *pg_dir;
  85        pud_t *pu_dir;
  86        pmd_t *pm_dir;
  87        pte_t *pt_dir;
  88        int ret = -ENOMEM;
  89
  90        while (address < end) {
  91                pg_dir = pgd_offset_k(address);
  92                if (pgd_none(*pg_dir)) {
  93                        pu_dir = vmem_pud_alloc();
  94                        if (!pu_dir)
  95                                goto out;
  96                        pgd_populate(&init_mm, pg_dir, pu_dir);
  97                }
  98                pu_dir = pud_offset(pg_dir, address);
  99#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
 100                if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
 101                    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
 102                        pud_val(*pu_dir) = __pa(address) |
 103                                _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
 104                                (ro ? _REGION_ENTRY_RO : 0);
 105                        address += PUD_SIZE;
 106                        continue;
 107                }
 108#endif
 109                if (pud_none(*pu_dir)) {
 110                        pm_dir = vmem_pmd_alloc();
 111                        if (!pm_dir)
 112                                goto out;
 113                        pud_populate(&init_mm, pu_dir, pm_dir);
 114                }
 115                pm_dir = pmd_offset(pu_dir, address);
 116#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
 117                if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
 118                    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
 119                        pmd_val(*pm_dir) = __pa(address) |
 120                                _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
 121                                (ro ? _SEGMENT_ENTRY_RO : 0);
 122                        address += PMD_SIZE;
 123                        continue;
 124                }
 125#endif
 126                if (pmd_none(*pm_dir)) {
 127                        pt_dir = vmem_pte_alloc(address);
 128                        if (!pt_dir)
 129                                goto out;
 130                        pmd_populate(&init_mm, pm_dir, pt_dir);
 131                }
 132
 133                pt_dir = pte_offset_kernel(pm_dir, address);
 134                pte_val(*pt_dir) = __pa(address) | (ro ? _PAGE_RO : 0);
 135                address += PAGE_SIZE;
 136        }
 137        ret = 0;
 138out:
 139        flush_tlb_kernel_range(start, end);
 140        return ret;
 141}
 142
 143/*
 144 * Remove a physical memory range from the 1:1 mapping.
 145 * Currently only invalidates page table entries.
 146 */
 147static void vmem_remove_range(unsigned long start, unsigned long size)
 148{
 149        unsigned long end = start + size;
 150        unsigned long address = start;
 151        pgd_t *pg_dir;
 152        pud_t *pu_dir;
 153        pmd_t *pm_dir;
 154        pte_t *pt_dir;
 155        pte_t  pte;
 156
 157        pte_val(pte) = _PAGE_TYPE_EMPTY;
 158        while (address < end) {
 159                pg_dir = pgd_offset_k(address);
 160                if (pgd_none(*pg_dir)) {
 161                        address += PGDIR_SIZE;
 162                        continue;
 163                }
 164                pu_dir = pud_offset(pg_dir, address);
 165                if (pud_none(*pu_dir)) {
 166                        address += PUD_SIZE;
 167                        continue;
 168                }
 169                if (pud_large(*pu_dir)) {
 170                        pud_clear(pu_dir);
 171                        address += PUD_SIZE;
 172                        continue;
 173                }
 174                pm_dir = pmd_offset(pu_dir, address);
 175                if (pmd_none(*pm_dir)) {
 176                        address += PMD_SIZE;
 177                        continue;
 178                }
 179                if (pmd_large(*pm_dir)) {
 180                        pmd_clear(pm_dir);
 181                        address += PMD_SIZE;
 182                        continue;
 183                }
 184                pt_dir = pte_offset_kernel(pm_dir, address);
 185                *pt_dir = pte;
 186                address += PAGE_SIZE;
 187        }
 188        flush_tlb_kernel_range(start, end);
 189}
 190
 191/*
 192 * Add a backed mem_map array to the virtual mem_map array.
 193 */
 194int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 195{
 196        unsigned long address = start;
 197        pgd_t *pg_dir;
 198        pud_t *pu_dir;
 199        pmd_t *pm_dir;
 200        pte_t *pt_dir;
 201        int ret = -ENOMEM;
 202
 203        for (address = start; address < end;) {
 204                pg_dir = pgd_offset_k(address);
 205                if (pgd_none(*pg_dir)) {
 206                        pu_dir = vmem_pud_alloc();
 207                        if (!pu_dir)
 208                                goto out;
 209                        pgd_populate(&init_mm, pg_dir, pu_dir);
 210                }
 211
 212                pu_dir = pud_offset(pg_dir, address);
 213                if (pud_none(*pu_dir)) {
 214                        pm_dir = vmem_pmd_alloc();
 215                        if (!pm_dir)
 216                                goto out;
 217                        pud_populate(&init_mm, pu_dir, pm_dir);
 218                }
 219
 220                pm_dir = pmd_offset(pu_dir, address);
 221                if (pmd_none(*pm_dir)) {
 222#ifdef CONFIG_64BIT
 223                        /* Use 1MB frames for vmemmap if available. We always
 224                         * use large frames even if they are only partially
 225                         * used.
 226                         * Otherwise we would have also page tables since
 227                         * vmemmap_populate gets called for each section
 228                         * separately. */
 229                        if (MACHINE_HAS_EDAT1) {
 230                                void *new_page;
 231
 232                                new_page = vmemmap_alloc_block(PMD_SIZE, node);
 233                                if (!new_page)
 234                                        goto out;
 235                                pmd_val(*pm_dir) = __pa(new_page) |
 236                                        _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
 237                                        _SEGMENT_ENTRY_CO;
 238                                address = (address + PMD_SIZE) & PMD_MASK;
 239                                continue;
 240                        }
 241#endif
 242                        pt_dir = vmem_pte_alloc(address);
 243                        if (!pt_dir)
 244                                goto out;
 245                        pmd_populate(&init_mm, pm_dir, pt_dir);
 246                } else if (pmd_large(*pm_dir)) {
 247                        address = (address + PMD_SIZE) & PMD_MASK;
 248                        continue;
 249                }
 250
 251                pt_dir = pte_offset_kernel(pm_dir, address);
 252                if (pte_none(*pt_dir)) {
 253                        unsigned long new_page;
 254
 255                        new_page =__pa(vmem_alloc_pages(0));
 256                        if (!new_page)
 257                                goto out;
 258                        pte_val(*pt_dir) = __pa(new_page);
 259                }
 260                address += PAGE_SIZE;
 261        }
 262        memset((void *)start, 0, end - start);
 263        ret = 0;
 264out:
 265        flush_tlb_kernel_range(start, end);
 266        return ret;
 267}
 268
 269void vmemmap_free(unsigned long start, unsigned long end)
 270{
 271}
 272
 273/*
 274 * Add memory segment to the segment list if it doesn't overlap with
 275 * an already present segment.
 276 */
 277static int insert_memory_segment(struct memory_segment *seg)
 278{
 279        struct memory_segment *tmp;
 280
 281        if (seg->start + seg->size > VMEM_MAX_PHYS ||
 282            seg->start + seg->size < seg->start)
 283                return -ERANGE;
 284
 285        list_for_each_entry(tmp, &mem_segs, list) {
 286                if (seg->start >= tmp->start + tmp->size)
 287                        continue;
 288                if (seg->start + seg->size <= tmp->start)
 289                        continue;
 290                return -ENOSPC;
 291        }
 292        list_add(&seg->list, &mem_segs);
 293        return 0;
 294}
 295
 296/*
 297 * Remove memory segment from the segment list.
 298 */
 299static void remove_memory_segment(struct memory_segment *seg)
 300{
 301        list_del(&seg->list);
 302}
 303
 304static void __remove_shared_memory(struct memory_segment *seg)
 305{
 306        remove_memory_segment(seg);
 307        vmem_remove_range(seg->start, seg->size);
 308}
 309
 310int vmem_remove_mapping(unsigned long start, unsigned long size)
 311{
 312        struct memory_segment *seg;
 313        int ret;
 314
 315        mutex_lock(&vmem_mutex);
 316
 317        ret = -ENOENT;
 318        list_for_each_entry(seg, &mem_segs, list) {
 319                if (seg->start == start && seg->size == size)
 320                        break;
 321        }
 322
 323        if (seg->start != start || seg->size != size)
 324                goto out;
 325
 326        ret = 0;
 327        __remove_shared_memory(seg);
 328        kfree(seg);
 329out:
 330        mutex_unlock(&vmem_mutex);
 331        return ret;
 332}
 333
 334int vmem_add_mapping(unsigned long start, unsigned long size)
 335{
 336        struct memory_segment *seg;
 337        int ret;
 338
 339        mutex_lock(&vmem_mutex);
 340        ret = -ENOMEM;
 341        seg = kzalloc(sizeof(*seg), GFP_KERNEL);
 342        if (!seg)
 343                goto out;
 344        seg->start = start;
 345        seg->size = size;
 346
 347        ret = insert_memory_segment(seg);
 348        if (ret)
 349                goto out_free;
 350
 351        ret = vmem_add_mem(start, size, 0);
 352        if (ret)
 353                goto out_remove;
 354        goto out;
 355
 356out_remove:
 357        __remove_shared_memory(seg);
 358out_free:
 359        kfree(seg);
 360out:
 361        mutex_unlock(&vmem_mutex);
 362        return ret;
 363}
 364
 365/*
 366 * map whole physical memory to virtual memory (identity mapping)
 367 * we reserve enough space in the vmalloc area for vmemmap to hotplug
 368 * additional memory segments.
 369 */
 370void __init vmem_map_init(void)
 371{
 372        unsigned long ro_start, ro_end;
 373        unsigned long start, end;
 374        int i;
 375
 376        ro_start = PFN_ALIGN((unsigned long)&_stext);
 377        ro_end = (unsigned long)&_eshared & PAGE_MASK;
 378        for (i = 0; i < MEMORY_CHUNKS; i++) {
 379                if (!memory_chunk[i].size)
 380                        continue;
 381                start = memory_chunk[i].addr;
 382                end = memory_chunk[i].addr + memory_chunk[i].size;
 383                if (start >= ro_end || end <= ro_start)
 384                        vmem_add_mem(start, end - start, 0);
 385                else if (start >= ro_start && end <= ro_end)
 386                        vmem_add_mem(start, end - start, 1);
 387                else if (start >= ro_start) {
 388                        vmem_add_mem(start, ro_end - start, 1);
 389                        vmem_add_mem(ro_end, end - ro_end, 0);
 390                } else if (end < ro_end) {
 391                        vmem_add_mem(start, ro_start - start, 0);
 392                        vmem_add_mem(ro_start, end - ro_start, 1);
 393                } else {
 394                        vmem_add_mem(start, ro_start - start, 0);
 395                        vmem_add_mem(ro_start, ro_end - ro_start, 1);
 396                        vmem_add_mem(ro_end, end - ro_end, 0);
 397                }
 398        }
 399}
 400
 401/*
 402 * Convert memory chunk array to a memory segment list so there is a single
 403 * list that contains both r/w memory and shared memory segments.
 404 */
 405static int __init vmem_convert_memory_chunk(void)
 406{
 407        struct memory_segment *seg;
 408        int i;
 409
 410        mutex_lock(&vmem_mutex);
 411        for (i = 0; i < MEMORY_CHUNKS; i++) {
 412                if (!memory_chunk[i].size)
 413                        continue;
 414                seg = kzalloc(sizeof(*seg), GFP_KERNEL);
 415                if (!seg)
 416                        panic("Out of memory...\n");
 417                seg->start = memory_chunk[i].addr;
 418                seg->size = memory_chunk[i].size;
 419                insert_memory_segment(seg);
 420        }
 421        mutex_unlock(&vmem_mutex);
 422        return 0;
 423}
 424
 425core_initcall(vmem_convert_memory_chunk);
 426