linux/lib/gpt.c
<<
>>
Prefs
   1/*
   2 * Copyright 2016 Red Hat Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * Authors: Jérôme Glisse <jglisse@redhat.com>
  15 */
  16/*
  17 * Generic page table structure with adjustable depth. For details refer to
  18 * include/linux/gpt.h
  19 */
  20#include <linux/mm.h>
  21#include <linux/gpt.h>
  22#include <linux/slab.h>
  23#include <linux/highmem.h>
  24
  25#ifdef CONFIG_HIGHMEM64G
  26/* Some arch do not define MAX_PHYSMEM_BITS */
  27#ifndef MAX_PHYSMEM_BITS
  28#define MAX_PHYSMEM_BITS 36
  29#endif /* MAX_PHYSMEM_BITS */
  30#define GTE_SHIFT 3
  31#define GTE_BITS 64
  32/* GPT_GTD_SHIFT - Shift for one directory level index */
  33#else /* CONFIG_HIGHMEM64G */
  34/* Some arch do not define MAX_PHYSMEM_BITS */
  35#ifndef MAX_PHYSMEM_BITS
  36#define MAX_PHYSMEM_BITS BITS_PER_LONG
  37#endif /* MAX_PHYSMEM_BITS */
  38#if BITS_PER_LONG == 32
  39#define GTE_SHIFT 2
  40#define GTE_BITS 32
  41#else /* This must be 64 McFly ! */
  42#define GTE_SHIFT 3
  43#define GTE_BITS 64
  44#endif
  45#endif /* CONFIG_HIGHMEM64G */
  46
  47#define GPT_DEFAULT_GFP (GFP_KERNEL | __GFP_ZERO | __GFP_HIGHMEM)
  48
  49
  50/*
  51 * GPT_PFN_BITS - Number of bits require to store any valid pfn value
  52 * GPT_PFN_MASK - Mask for pfn value
  53 *
  54 * This means that there is (BITS_PER_LONG - GPT_PFN_BITS) bits that can be use
  55 * in anyway by the end user of GPT struct.
  56 */
  57#define GPT_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
  58#define GPT_PFN_MASK ((1UL << GPT_PFN_BITS) - 1)
  59
  60/*
  61 * GPT_GTD_SHIFT - Shift for one directory level index
  62 * GPT_GTE_PER_GTD - Number of page table entry in one directory level
  63 */
  64#define GTD_SHIFT (PAGE_SHIFT - GTE_SHIFT)
  65#define GTE_PER_GTD (1UL << GTD_SHIFT)
  66#define GTD_MASK (GTE_PER_GTD - 1)
  67
  68
  69struct gpt *gpt_alloc(unsigned long start,
  70                      unsigned long end,
  71                      uint8_t shift,
  72                      uint8_t valid_bit)
  73{
  74        unsigned long ngd, tmp;
  75        struct gpt *gpt;
  76
  77        /* Sanity checks */
  78        start &= PAGE_MASK;
  79        end &= PAGE_MASK;
  80        BUG_ON(start >= end);
  81        BUG_ON(valid_bit >= shift);
  82        BUG_ON((GTE_BITS - shift) < GPT_PFN_BITS);
  83
  84        gpt = kmalloc(sizeof(*gpt), GFP_KERNEL);
  85        if (!gpt)
  86                return NULL;
  87
  88        gpt->start = start;
  89        gpt->end = end;
  90        gpt->nlevels = 0;
  91        gpt->shift = shift;
  92        gpt->valid_bit = valid_bit;
  93        spin_lock_init(&gpt->lock);
  94        atomic_set(&gpt->refcount, 0);
  95
  96        ngd = (end - start) >> PAGE_SHIFT;
  97        tmp = (ngd) >> GTD_SHIFT;
  98        while (tmp) {
  99                ngd = tmp;
 100                tmp = tmp >> GTD_SHIFT;
 101                gpt->nlevels++;
 102        }
 103        BUG_ON(gpt->nlevels >= GPT_MAX_LEVEL);
 104
 105        gpt->gdp = kzalloc(ngd * sizeof(gte_t), GFP_KERNEL);
 106        if (!gpt->gdp) {
 107                kfree(gpt);
 108                return NULL;
 109        }
 110
 111        return gpt;
 112}
 113EXPORT_SYMBOL(gpt_alloc);
 114
 115void gpt_free(struct gpt *gpt)
 116{
 117        struct gpt_walk walk;
 118
 119        if (!gpt)
 120                return;
 121
 122        gpt_walk_init(&walk, gpt);
 123        gpt_walk_prune(&walk, gpt->start, gpt->end);
 124        gpt_walk_fini(&walk);
 125
 126        kfree(gpt->gdp);
 127        kfree(gpt);
 128}
 129EXPORT_SYMBOL(gpt_free);
 130
 131
 132static inline unsigned long gte_to_pfn(struct gpt_walk *walk, gte_t gte)
 133{
 134        return (unsigned long)(gte >> walk->gpt->shift) & GPT_PFN_MASK;
 135}
 136
 137static inline struct page *gte_to_page(struct gpt_walk *walk, gte_t gte)
 138{
 139        if (!(gte & (1UL << walk->gpt->valid_bit)))
 140                return NULL;
 141        return pfn_to_page(gte_to_pfn(walk, gte));
 142}
 143
 144static inline unsigned gpt_index(struct gpt_walk *walk,
 145                                 unsigned long addr,
 146                                 unsigned level)
 147{
 148        unsigned shift;
 149
 150        shift = GTD_SHIFT * level + PAGE_SHIFT;
 151        return ((addr - walk->gpt->start) >> shift) & GTD_MASK;
 152}
 153
 154static inline unsigned long gpt_level_start(struct gpt_walk *walk,
 155                                            unsigned long addr,
 156                                            unsigned level)
 157{
 158        unsigned long mask;
 159        unsigned shift;
 160
 161        shift = GTD_SHIFT * (level + 1) + PAGE_SHIFT;
 162        mask = ~((1UL << shift) - 1);
 163        return ((addr - walk->gpt->start) & mask) + walk->gpt->start;
 164}
 165
 166static inline unsigned long gpt_level_end(struct gpt_walk *walk,
 167                                          unsigned long addr,
 168                                          unsigned level)
 169{
 170        unsigned long mask;
 171        unsigned shift;
 172
 173        shift = GTD_SHIFT * (level + 1) + PAGE_SHIFT;
 174        mask = (1UL << shift) - 1;
 175        return ((addr - walk->gpt->start) | mask) + walk->gpt->start + 1;
 176}
 177
 178
 179/* gpt_walk_init() - Init gpt walk structure
 180 * @walk: walk structure to initialize
 181 */
 182void gpt_walk_init(struct gpt_walk *walk, struct gpt *gpt)
 183{
 184        unsigned level;
 185
 186        BUG_ON(!gpt);
 187
 188        walk->gpt = gpt;
 189        walk->gtd[walk->gpt->nlevels] = NULL;
 190        walk->gte[walk->gpt->nlevels] = gpt->gdp;
 191        walk->start = gpt->start;
 192        walk->end = gpt->end;
 193
 194        for (level = 0; level < walk->gpt->nlevels; level++) {
 195                walk->gtd[level] = NULL;
 196                walk->gte[level] = NULL;
 197        }
 198}
 199EXPORT_SYMBOL(gpt_walk_init);
 200
 201/* gpt_walk_fini() - Finalize gpt walk structure
 202 * @walk: walk structure to finalize
 203 *
 204 * This unmap any mapped directory.
 205 */
 206void gpt_walk_fini(struct gpt_walk *walk)
 207{
 208        unsigned level;
 209
 210        for (level = 0; level < walk->gpt->nlevels; ++level) {
 211                if (!walk->gtd[level])
 212                        continue;
 213                kunmap(walk->gtd[level]);
 214                atomic_dec(gpt_walk_gtd_refcount(walk, level));
 215                walk->gtd[level] = NULL;
 216                walk->gte[level] = NULL;
 217        }
 218        walk->start = walk->gpt->start;
 219        walk->end = walk->gpt->end;
 220}
 221EXPORT_SYMBOL(gpt_walk_fini);
 222
 223/* gpt_walk_gtep_from_addr() - Get entry pointer for a given address
 224 * @walk: walk structure use to walk generic page table
 225 * @addr: address of interest
 226 * Returns: NULL if address is not in range of if there is no directory
 227 *
 228 * This will return pointer to page table entry if a directory exist for the
 229 * given address.
 230 */
 231gte_t *gpt_walk_gtep_from_addr(struct gpt_walk *walk, unsigned long addr)
 232{
 233        unsigned l;
 234
 235        if (addr < walk->gpt->start || addr >= walk->gpt->end)
 236                return NULL;
 237
 238again:
 239        if (walk->gte[0] && addr >= walk->start && addr < walk->end)
 240                return &walk->gte[0][gpt_index(walk, addr, 0)];
 241
 242        for (l = 0; l < walk->gpt->nlevels; l++) {
 243                if (!walk->gtd[l])
 244                        continue;
 245
 246                if (addr >= walk->start && addr < walk->end)
 247                        break;
 248
 249                kunmap(walk->gtd[l]);
 250                atomic_dec(gpt_walk_gtd_refcount(walk, l));
 251                walk->gtd[l] = NULL;
 252                walk->gte[l] = NULL;
 253                /* Compute start and end address of upper level */
 254                walk->start = gpt_level_start(walk, walk->start, l + 1);
 255                walk->end = gpt_level_end(walk, walk->start, l + 1);
 256        }
 257
 258        for (; l; l--) {
 259                unsigned idx;
 260                atomic_t *refcount;
 261
 262                idx = gpt_index(walk, addr, l);
 263                spin_lock(gpt_walk_gtd_lock_ptr(walk, l));
 264                walk->gtd[l - 1] = gte_to_page(walk, walk->gte[l][idx]);
 265                refcount = gpt_walk_gtd_refcount(walk, l - 1);
 266                if (refcount)
 267                        atomic_inc(refcount);
 268                else
 269                        walk->gtd[l - 1] = NULL;
 270                spin_unlock(gpt_walk_gtd_lock_ptr(walk, l));
 271                if (!walk->gtd[l- 1])
 272                        return NULL;
 273
 274                walk->gte[l - 1] = kmap(walk->gtd[l - 1]);
 275
 276                /* Compute start and end address of lower level */
 277                walk->start = gpt_level_start(walk, addr, l - 1);
 278                walk->end = gpt_level_end(walk, addr, l - 1);
 279        }
 280
 281        /* At this point all gtd levels are mapped */
 282        goto again;
 283}
 284EXPORT_SYMBOL(gpt_walk_gtep_from_addr);
 285
 286/* gpt_populate() - Populate page table directory tree for given address
 287 * @walk: walk structure use to walk generic page table
 288 * @addr: address of interest
 289 * Returns: NULL if nothing to populate (locking error) directory entry pointer
 290 *
 291 * This will populate all directory levels that are missing for a given address
 292 * and it returns a pointer to lowest directory level.
 293 */
 294gte_t *gpt_walk_populate(struct gpt_walk *walk, unsigned long addr)
 295{
 296        unsigned idx;
 297        gte_t *gtep;
 298        int level;
 299
 300        if ((gtep = gpt_walk_gtep_from_addr(walk, addr)))
 301                return gtep;
 302
 303        for (level = walk->gpt->nlevels - 1; level >= 0; level--) {
 304                unsigned long pfn;
 305
 306                if (walk->gtd[level])
 307                        continue;
 308
 309                walk->gtd[level] = alloc_page(GPT_DEFAULT_GFP);
 310                if (!walk->gtd[level])
 311                        return NULL;
 312                if (!ptlock_init(walk->gtd[level])) {
 313                        __free_page(walk->gtd[level]);
 314                        walk->gtd[level] = NULL;
 315                        return NULL;
 316                }
 317                pfn = page_to_pfn(walk->gtd[level]);
 318
 319                /* Initialize new directory */
 320                atomic_set(&walk->gtd[level]->_mapcount, 1);
 321
 322                /* Compute start and end address of current level */
 323                walk->start = gpt_level_start(walk, addr, level);
 324                walk->end = gpt_level_end(walk, addr, level);
 325                walk->gte[level] = kmap(walk->gtd[level]);
 326
 327                /* Set directory entry in upper level */
 328                idx = gpt_index(walk, addr, level + 1);
 329                atomic_inc(gpt_walk_gtd_refcount(walk, level + 1));
 330                spin_lock(gpt_walk_gtd_lock_ptr(walk, level + 1));
 331                walk->gte[level + 1][idx] = pfn << walk->gpt->shift;
 332                walk->gte[level + 1][idx] |= 1UL << walk->gpt->valid_bit;
 333                spin_unlock(gpt_walk_gtd_lock_ptr(walk, level + 1));
 334        }
 335
 336        idx = gpt_index(walk, addr, 0);
 337        return &walk->gte[0][idx];
 338}
 339EXPORT_SYMBOL(gpt_walk_populate);
 340
 341/* gpt_page_reset() - Reset struct page fields use by gpt to sane value
 342 * @page: page that is being prune from directory tree and needs reset
 343 *
 344 * We use few fields inside the struct page to store informations for each of
 345 * directory in the tree. We need to reset some of those fields to sane value
 346 * so that core mm does not freaks out when we free a directory page.
 347 *
 348 * This should be call by the prune callback provided to gpt_prune() when it
 349 * decides to free a directory level. Default callback, gpt_prune_default(),
 350 * properly call this function.
 351 */
 352static inline void gpt_page_reset(struct page *page)
 353{
 354        atomic_set(&page->_mapcount, -1);
 355}
 356
 357/* gpt_prune() - Prune page table directory tree for given address range
 358 * @walk: walk structure use to walk generic page table
 359 * @start: range start address
 360 * @end: range end address
 361 *
 362 * This will prune the directory tree for the given address range. Any empty
 363 * directory will be free.
 364 *
 365 * WARNING YOU ARE RESPONSIBLE FOR LOCKING IN RESPECT TO CONCURRENT PAGE TABLE
 366 * PRUNING OR POPULATING !
 367 */
 368void gpt_walk_prune(struct gpt_walk *walk,
 369                    unsigned long start,
 370                    unsigned long end)
 371{
 372        unsigned long addr;
 373
 374        start &= PAGE_MASK;
 375        end &= PAGE_MASK;
 376        BUG_ON(start >= end);
 377
 378        for (addr = start; addr < end;) {
 379                unsigned long next;
 380                unsigned l;
 381
 382                next = min(end, gpt_level_end(walk, addr, 0));
 383                gpt_walk_gtep_from_addr(walk, addr);
 384                for (l = 0; l < walk->gpt->nlevels; l++) {
 385                        unsigned idx;
 386
 387                        if (!walk->gtd[l]) {
 388                                next = min(end, gpt_level_end(walk, addr, l));
 389                                continue;
 390                        }
 391
 392                        if (atomic_read(gpt_walk_gtd_refcount(walk, l)) != 1)
 393                                break;
 394
 395                        /* First unmap end update walk structure */
 396                        kunmap(walk->gtd[l]);
 397                        walk->gtd[l] = NULL;
 398                        walk->gte[l] = NULL;
 399
 400                        /* Pointer to directory entry in the upper level */
 401                        idx = gpt_index(walk, walk->start, l + 1);
 402                        spin_lock(gpt_walk_gtd_lock_ptr(walk, l + 1));
 403                        walk->gte[l + 1][idx] = 0;
 404                        spin_unlock(gpt_walk_gtd_lock_ptr(walk, l + 1));
 405                        atomic_dec(gpt_walk_gtd_refcount(walk, l + 1));
 406
 407                        /* The next address is end address of current level */
 408                        next = min(end, walk->end);
 409
 410                        /* Start and end address are now for the upper level */
 411                        walk->start = gpt_level_start(walk, addr, l + 1);
 412                        walk->end = gpt_level_end(walk, addr, l + 1);
 413                }
 414                addr = next;
 415        }
 416}
 417EXPORT_SYMBOL(gpt_walk_prune);
 418
 419int gpt_walk_range(struct gpt_walk *walk,
 420                   unsigned long start,
 421                   unsigned long end,
 422                   gpt_walk_cb_t cb,
 423                   void *private)
 424{
 425        unsigned long addr;
 426
 427        for (addr = start; addr < end;) {
 428                unsigned long next;
 429                spinlock_t *gtl;
 430                gte_t *gtep;
 431                int ret;
 432
 433                gtep = gpt_walk_gtep_from_addr(walk, addr);
 434                if (!gtep) {
 435                        unsigned l;
 436
 437                        for (l = 0; l < walk->gpt->nlevels; l++) {
 438                                if (walk->gtd[l])
 439                                        break;
 440                                addr = min(end, gpt_level_end(walk, addr, l));
 441                        }
 442                        continue;
 443                }
 444
 445                next = min(end, gpt_level_end(walk, addr, 0));
 446                gtl = gpt_walk_gtd_lock_ptr(walk, 0);
 447                ret = cb(walk, addr, next, gtl, gtep, private);
 448                if (ret)
 449                        return ret;
 450                addr = next;
 451        }
 452
 453        return 0;
 454}
 455EXPORT_SYMBOL(gpt_walk_range);
 456