linux/drivers/infiniband/hw/hfi1/mmu_rb.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
   2/*
   3 * Copyright(c) 2020 Cornelis Networks, Inc.
   4 * Copyright(c) 2016 - 2017 Intel Corporation.
   5 */
   6
   7#include <linux/list.h>
   8#include <linux/rculist.h>
   9#include <linux/mmu_notifier.h>
  10#include <linux/interval_tree_generic.h>
  11#include <linux/sched/mm.h>
  12
  13#include "mmu_rb.h"
  14#include "trace.h"
  15
  16static unsigned long mmu_node_start(struct mmu_rb_node *);
  17static unsigned long mmu_node_last(struct mmu_rb_node *);
  18static int mmu_notifier_range_start(struct mmu_notifier *,
  19                const struct mmu_notifier_range *);
  20static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
  21                                           unsigned long, unsigned long);
  22static void do_remove(struct mmu_rb_handler *handler,
  23                      struct list_head *del_list);
  24static void handle_remove(struct work_struct *work);
  25
  26static const struct mmu_notifier_ops mn_opts = {
  27        .invalidate_range_start = mmu_notifier_range_start,
  28};
  29
  30INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last,
  31                     mmu_node_start, mmu_node_last, static, __mmu_int_rb);
  32
  33static unsigned long mmu_node_start(struct mmu_rb_node *node)
  34{
  35        return node->addr & PAGE_MASK;
  36}
  37
  38static unsigned long mmu_node_last(struct mmu_rb_node *node)
  39{
  40        return PAGE_ALIGN(node->addr + node->len) - 1;
  41}
  42
  43int hfi1_mmu_rb_register(void *ops_arg,
  44                         struct mmu_rb_ops *ops,
  45                         struct workqueue_struct *wq,
  46                         struct mmu_rb_handler **handler)
  47{
  48        struct mmu_rb_handler *h;
  49        int ret;
  50
  51        h = kzalloc(sizeof(*h), GFP_KERNEL);
  52        if (!h)
  53                return -ENOMEM;
  54
  55        h->root = RB_ROOT_CACHED;
  56        h->ops = ops;
  57        h->ops_arg = ops_arg;
  58        INIT_HLIST_NODE(&h->mn.hlist);
  59        spin_lock_init(&h->lock);
  60        h->mn.ops = &mn_opts;
  61        INIT_WORK(&h->del_work, handle_remove);
  62        INIT_LIST_HEAD(&h->del_list);
  63        INIT_LIST_HEAD(&h->lru_list);
  64        h->wq = wq;
  65
  66        ret = mmu_notifier_register(&h->mn, current->mm);
  67        if (ret) {
  68                kfree(h);
  69                return ret;
  70        }
  71
  72        *handler = h;
  73        return 0;
  74}
  75
  76void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
  77{
  78        struct mmu_rb_node *rbnode;
  79        struct rb_node *node;
  80        unsigned long flags;
  81        struct list_head del_list;
  82
  83        /* Prevent freeing of mm until we are completely finished. */
  84        mmgrab(handler->mn.mm);
  85
  86        /* Unregister first so we don't get any more notifications. */
  87        mmu_notifier_unregister(&handler->mn, handler->mn.mm);
  88
  89        /*
  90         * Make sure the wq delete handler is finished running.  It will not
  91         * be triggered once the mmu notifiers are unregistered above.
  92         */
  93        flush_work(&handler->del_work);
  94
  95        INIT_LIST_HEAD(&del_list);
  96
  97        spin_lock_irqsave(&handler->lock, flags);
  98        while ((node = rb_first_cached(&handler->root))) {
  99                rbnode = rb_entry(node, struct mmu_rb_node, node);
 100                rb_erase_cached(node, &handler->root);
 101                /* move from LRU list to delete list */
 102                list_move(&rbnode->list, &del_list);
 103        }
 104        spin_unlock_irqrestore(&handler->lock, flags);
 105
 106        do_remove(handler, &del_list);
 107
 108        /* Now the mm may be freed. */
 109        mmdrop(handler->mn.mm);
 110
 111        kfree(handler);
 112}
 113
 114int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
 115                       struct mmu_rb_node *mnode)
 116{
 117        struct mmu_rb_node *node;
 118        unsigned long flags;
 119        int ret = 0;
 120
 121        trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len);
 122
 123        if (current->mm != handler->mn.mm)
 124                return -EPERM;
 125
 126        spin_lock_irqsave(&handler->lock, flags);
 127        node = __mmu_rb_search(handler, mnode->addr, mnode->len);
 128        if (node) {
 129                ret = -EINVAL;
 130                goto unlock;
 131        }
 132        __mmu_int_rb_insert(mnode, &handler->root);
 133        list_add(&mnode->list, &handler->lru_list);
 134
 135        ret = handler->ops->insert(handler->ops_arg, mnode);
 136        if (ret) {
 137                __mmu_int_rb_remove(mnode, &handler->root);
 138                list_del(&mnode->list); /* remove from LRU list */
 139        }
 140        mnode->handler = handler;
 141unlock:
 142        spin_unlock_irqrestore(&handler->lock, flags);
 143        return ret;
 144}
 145
 146/* Caller must hold handler lock */
 147static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
 148                                           unsigned long addr,
 149                                           unsigned long len)
 150{
 151        struct mmu_rb_node *node = NULL;
 152
 153        trace_hfi1_mmu_rb_search(addr, len);
 154        if (!handler->ops->filter) {
 155                node = __mmu_int_rb_iter_first(&handler->root, addr,
 156                                               (addr + len) - 1);
 157        } else {
 158                for (node = __mmu_int_rb_iter_first(&handler->root, addr,
 159                                                    (addr + len) - 1);
 160                     node;
 161                     node = __mmu_int_rb_iter_next(node, addr,
 162                                                   (addr + len) - 1)) {
 163                        if (handler->ops->filter(node, addr, len))
 164                                return node;
 165                }
 166        }
 167        return node;
 168}
 169
 170bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
 171                                     unsigned long addr, unsigned long len,
 172                                     struct mmu_rb_node **rb_node)
 173{
 174        struct mmu_rb_node *node;
 175        unsigned long flags;
 176        bool ret = false;
 177
 178        if (current->mm != handler->mn.mm)
 179                return ret;
 180
 181        spin_lock_irqsave(&handler->lock, flags);
 182        node = __mmu_rb_search(handler, addr, len);
 183        if (node) {
 184                if (node->addr == addr && node->len == len)
 185                        goto unlock;
 186                __mmu_int_rb_remove(node, &handler->root);
 187                list_del(&node->list); /* remove from LRU list */
 188                ret = true;
 189        }
 190unlock:
 191        spin_unlock_irqrestore(&handler->lock, flags);
 192        *rb_node = node;
 193        return ret;
 194}
 195
 196void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
 197{
 198        struct mmu_rb_node *rbnode, *ptr;
 199        struct list_head del_list;
 200        unsigned long flags;
 201        bool stop = false;
 202
 203        if (current->mm != handler->mn.mm)
 204                return;
 205
 206        INIT_LIST_HEAD(&del_list);
 207
 208        spin_lock_irqsave(&handler->lock, flags);
 209        list_for_each_entry_safe_reverse(rbnode, ptr, &handler->lru_list,
 210                                         list) {
 211                if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
 212                                        &stop)) {
 213                        __mmu_int_rb_remove(rbnode, &handler->root);
 214                        /* move from LRU list to delete list */
 215                        list_move(&rbnode->list, &del_list);
 216                }
 217                if (stop)
 218                        break;
 219        }
 220        spin_unlock_irqrestore(&handler->lock, flags);
 221
 222        while (!list_empty(&del_list)) {
 223                rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
 224                list_del(&rbnode->list);
 225                handler->ops->remove(handler->ops_arg, rbnode);
 226        }
 227}
 228
 229/*
 230 * It is up to the caller to ensure that this function does not race with the
 231 * mmu invalidate notifier which may be calling the users remove callback on
 232 * 'node'.
 233 */
 234void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
 235                        struct mmu_rb_node *node)
 236{
 237        unsigned long flags;
 238
 239        if (current->mm != handler->mn.mm)
 240                return;
 241
 242        /* Validity of handler and node pointers has been checked by caller. */
 243        trace_hfi1_mmu_rb_remove(node->addr, node->len);
 244        spin_lock_irqsave(&handler->lock, flags);
 245        __mmu_int_rb_remove(node, &handler->root);
 246        list_del(&node->list); /* remove from LRU list */
 247        spin_unlock_irqrestore(&handler->lock, flags);
 248
 249        handler->ops->remove(handler->ops_arg, node);
 250}
 251
 252static int mmu_notifier_range_start(struct mmu_notifier *mn,
 253                const struct mmu_notifier_range *range)
 254{
 255        struct mmu_rb_handler *handler =
 256                container_of(mn, struct mmu_rb_handler, mn);
 257        struct rb_root_cached *root = &handler->root;
 258        struct mmu_rb_node *node, *ptr = NULL;
 259        unsigned long flags;
 260        bool added = false;
 261
 262        spin_lock_irqsave(&handler->lock, flags);
 263        for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1);
 264             node; node = ptr) {
 265                /* Guard against node removal. */
 266                ptr = __mmu_int_rb_iter_next(node, range->start,
 267                                             range->end - 1);
 268                trace_hfi1_mmu_mem_invalidate(node->addr, node->len);
 269                if (handler->ops->invalidate(handler->ops_arg, node)) {
 270                        __mmu_int_rb_remove(node, root);
 271                        /* move from LRU list to delete list */
 272                        list_move(&node->list, &handler->del_list);
 273                        added = true;
 274                }
 275        }
 276        spin_unlock_irqrestore(&handler->lock, flags);
 277
 278        if (added)
 279                queue_work(handler->wq, &handler->del_work);
 280
 281        return 0;
 282}
 283
 284/*
 285 * Call the remove function for the given handler and the list.  This
 286 * is expected to be called with a delete list extracted from handler.
 287 * The caller should not be holding the handler lock.
 288 */
 289static void do_remove(struct mmu_rb_handler *handler,
 290                      struct list_head *del_list)
 291{
 292        struct mmu_rb_node *node;
 293
 294        while (!list_empty(del_list)) {
 295                node = list_first_entry(del_list, struct mmu_rb_node, list);
 296                list_del(&node->list);
 297                handler->ops->remove(handler->ops_arg, node);
 298        }
 299}
 300
 301/*
 302 * Work queue function to remove all nodes that have been queued up to
 303 * be removed.  The key feature is that mm->mmap_lock is not being held
 304 * and the remove callback can sleep while taking it, if needed.
 305 */
 306static void handle_remove(struct work_struct *work)
 307{
 308        struct mmu_rb_handler *handler = container_of(work,
 309                                                struct mmu_rb_handler,
 310                                                del_work);
 311        struct list_head del_list;
 312        unsigned long flags;
 313
 314        /* remove anything that is queued to get removed */
 315        spin_lock_irqsave(&handler->lock, flags);
 316        list_replace_init(&handler->del_list, &del_list);
 317        spin_unlock_irqrestore(&handler->lock, flags);
 318
 319        do_remove(handler, &del_list);
 320}
 321