linux/drivers/dax/kmem.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright(c) 2016-2019 Intel Corporation. All rights reserved. */
   3#include <linux/memremap.h>
   4#include <linux/pagemap.h>
   5#include <linux/memory.h>
   6#include <linux/module.h>
   7#include <linux/device.h>
   8#include <linux/pfn_t.h>
   9#include <linux/slab.h>
  10#include <linux/dax.h>
  11#include <linux/fs.h>
  12#include <linux/mm.h>
  13#include <linux/mman.h>
  14#include "dax-private.h"
  15#include "bus.h"
  16
  17/* Memory resource name used for add_memory_driver_managed(). */
  18static const char *kmem_name;
  19/* Set if any memory will remain added when the driver will be unloaded. */
  20static bool any_hotremove_failed;
  21
  22static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
  23{
  24        struct dev_dax_range *dax_range = &dev_dax->ranges[i];
  25        struct range *range = &dax_range->range;
  26
  27        /* memory-block align the hotplug range */
  28        r->start = ALIGN(range->start, memory_block_size_bytes());
  29        r->end = ALIGN_DOWN(range->end + 1, memory_block_size_bytes()) - 1;
  30        if (r->start >= r->end) {
  31                r->start = range->start;
  32                r->end = range->end;
  33                return -ENOSPC;
  34        }
  35        return 0;
  36}
  37
  38struct dax_kmem_data {
  39        const char *res_name;
  40        int mgid;
  41        struct resource *res[];
  42};
  43
  44static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
  45{
  46        struct device *dev = &dev_dax->dev;
  47        unsigned long total_len = 0;
  48        struct dax_kmem_data *data;
  49        int i, rc, mapped = 0;
  50        int numa_node;
  51
  52        /*
  53         * Ensure good NUMA information for the persistent memory.
  54         * Without this check, there is a risk that slow memory
  55         * could be mixed in a node with faster memory, causing
  56         * unavoidable performance issues.
  57         */
  58        numa_node = dev_dax->target_node;
  59        if (numa_node < 0) {
  60                dev_warn(dev, "rejecting DAX region with invalid node: %d\n",
  61                                numa_node);
  62                return -EINVAL;
  63        }
  64
  65        for (i = 0; i < dev_dax->nr_range; i++) {
  66                struct range range;
  67
  68                rc = dax_kmem_range(dev_dax, i, &range);
  69                if (rc) {
  70                        dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
  71                                        i, range.start, range.end);
  72                        continue;
  73                }
  74                total_len += range_len(&range);
  75        }
  76
  77        if (!total_len) {
  78                dev_warn(dev, "rejecting DAX region without any memory after alignment\n");
  79                return -EINVAL;
  80        }
  81
  82        data = kzalloc(struct_size(data, res, dev_dax->nr_range), GFP_KERNEL);
  83        if (!data)
  84                return -ENOMEM;
  85
  86        rc = -ENOMEM;
  87        data->res_name = kstrdup(dev_name(dev), GFP_KERNEL);
  88        if (!data->res_name)
  89                goto err_res_name;
  90
  91        rc = memory_group_register_static(numa_node, total_len);
  92        if (rc < 0)
  93                goto err_reg_mgid;
  94        data->mgid = rc;
  95
  96        for (i = 0; i < dev_dax->nr_range; i++) {
  97                struct resource *res;
  98                struct range range;
  99
 100                rc = dax_kmem_range(dev_dax, i, &range);
 101                if (rc)
 102                        continue;
 103
 104                /* Region is permanently reserved if hotremove fails. */
 105                res = request_mem_region(range.start, range_len(&range), data->res_name);
 106                if (!res) {
 107                        dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n",
 108                                        i, range.start, range.end);
 109                        /*
 110                         * Once some memory has been onlined we can't
 111                         * assume that it can be un-onlined safely.
 112                         */
 113                        if (mapped)
 114                                continue;
 115                        rc = -EBUSY;
 116                        goto err_request_mem;
 117                }
 118                data->res[i] = res;
 119
 120                /*
 121                 * Set flags appropriate for System RAM.  Leave ..._BUSY clear
 122                 * so that add_memory() can add a child resource.  Do not
 123                 * inherit flags from the parent since it may set new flags
 124                 * unknown to us that will break add_memory() below.
 125                 */
 126                res->flags = IORESOURCE_SYSTEM_RAM;
 127
 128                /*
 129                 * Ensure that future kexec'd kernels will not treat
 130                 * this as RAM automatically.
 131                 */
 132                rc = add_memory_driver_managed(data->mgid, range.start,
 133                                range_len(&range), kmem_name, MHP_NID_IS_MGID);
 134
 135                if (rc) {
 136                        dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
 137                                        i, range.start, range.end);
 138                        release_resource(res);
 139                        kfree(res);
 140                        data->res[i] = NULL;
 141                        if (mapped)
 142                                continue;
 143                        goto err_request_mem;
 144                }
 145                mapped++;
 146        }
 147
 148        dev_set_drvdata(dev, data);
 149
 150        return 0;
 151
 152err_request_mem:
 153        memory_group_unregister(data->mgid);
 154err_reg_mgid:
 155        kfree(data->res_name);
 156err_res_name:
 157        kfree(data);
 158        return rc;
 159}
 160
 161#ifdef CONFIG_MEMORY_HOTREMOVE
 162static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
 163{
 164        int i, success = 0;
 165        struct device *dev = &dev_dax->dev;
 166        struct dax_kmem_data *data = dev_get_drvdata(dev);
 167
 168        /*
 169         * We have one shot for removing memory, if some memory blocks were not
 170         * offline prior to calling this function remove_memory() will fail, and
 171         * there is no way to hotremove this memory until reboot because device
 172         * unbind will succeed even if we return failure.
 173         */
 174        for (i = 0; i < dev_dax->nr_range; i++) {
 175                struct range range;
 176                int rc;
 177
 178                rc = dax_kmem_range(dev_dax, i, &range);
 179                if (rc)
 180                        continue;
 181
 182                rc = remove_memory(range.start, range_len(&range));
 183                if (rc == 0) {
 184                        release_resource(data->res[i]);
 185                        kfree(data->res[i]);
 186                        data->res[i] = NULL;
 187                        success++;
 188                        continue;
 189                }
 190                any_hotremove_failed = true;
 191                dev_err(dev,
 192                        "mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n",
 193                                i, range.start, range.end);
 194        }
 195
 196        if (success >= dev_dax->nr_range) {
 197                memory_group_unregister(data->mgid);
 198                kfree(data->res_name);
 199                kfree(data);
 200                dev_set_drvdata(dev, NULL);
 201        }
 202}
 203#else
 204static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
 205{
 206        /*
 207         * Without hotremove purposely leak the request_mem_region() for the
 208         * device-dax range and return '0' to ->remove() attempts. The removal
 209         * of the device from the driver always succeeds, but the region is
 210         * permanently pinned as reserved by the unreleased
 211         * request_mem_region().
 212         */
 213        any_hotremove_failed = true;
 214}
 215#endif /* CONFIG_MEMORY_HOTREMOVE */
 216
 217static struct dax_device_driver device_dax_kmem_driver = {
 218        .probe = dev_dax_kmem_probe,
 219        .remove = dev_dax_kmem_remove,
 220};
 221
 222static int __init dax_kmem_init(void)
 223{
 224        int rc;
 225
 226        /* Resource name is permanently allocated if any hotremove fails. */
 227        kmem_name = kstrdup_const("System RAM (kmem)", GFP_KERNEL);
 228        if (!kmem_name)
 229                return -ENOMEM;
 230
 231        rc = dax_driver_register(&device_dax_kmem_driver);
 232        if (rc)
 233                kfree_const(kmem_name);
 234        return rc;
 235}
 236
 237static void __exit dax_kmem_exit(void)
 238{
 239        dax_driver_unregister(&device_dax_kmem_driver);
 240        if (!any_hotremove_failed)
 241                kfree_const(kmem_name);
 242}
 243
 244MODULE_AUTHOR("Intel Corporation");
 245MODULE_LICENSE("GPL v2");
 246module_init(dax_kmem_init);
 247module_exit(dax_kmem_exit);
 248MODULE_ALIAS_DAX_DEVICE(0);
 249