linux/drivers/infiniband/hw/hfi1/mmu_rb.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
   2/*
   3 * Copyright(c) 2020 Cornelis Networks, Inc.
   4 * Copyright(c) 2016 - 2017 Intel Corporation.
   5 */
   6
   7#include <linux/list.h>
   8#include <linux/rculist.h>
   9#include <linux/mmu_notifier.h>
  10#include <linux/interval_tree_generic.h>
  11#include <linux/sched/mm.h>
  12
  13#include "mmu_rb.h"
  14#include "trace.h"
  15
  16static unsigned long mmu_node_start(struct mmu_rb_node *);
  17static unsigned long mmu_node_last(struct mmu_rb_node *);
  18static int mmu_notifier_range_start(struct mmu_notifier *,
  19                const struct mmu_notifier_range *);
  20static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
  21                                           unsigned long, unsigned long);
  22static void do_remove(struct mmu_rb_handler *handler,
  23                      struct list_head *del_list);
  24static void handle_remove(struct work_struct *work);
  25
  26static const struct mmu_notifier_ops mn_opts = {
  27        .invalidate_range_start = mmu_notifier_range_start,
  28};
  29
  30INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last,
  31                     mmu_node_start, mmu_node_last, static, __mmu_int_rb);
  32
  33static unsigned long mmu_node_start(struct mmu_rb_node *node)
  34{
  35        return node->addr & PAGE_MASK;
  36}
  37
  38static unsigned long mmu_node_last(struct mmu_rb_node *node)
  39{
  40        return PAGE_ALIGN(node->addr + node->len) - 1;
  41}
  42
  43int hfi1_mmu_rb_register(void *ops_arg,
  44                         struct mmu_rb_ops *ops,
  45                         struct workqueue_struct *wq,
  46                         struct mmu_rb_handler **handler)
  47{
  48        struct mmu_rb_handler *h;
  49        int ret;
  50
  51        h = kzalloc(sizeof(*h), GFP_KERNEL);
  52        if (!h)
  53                return -ENOMEM;
  54
  55        h->root = RB_ROOT_CACHED;
  56        h->ops = ops;
  57        h->ops_arg = ops_arg;
  58        INIT_HLIST_NODE(&h->mn.hlist);
  59        spin_lock_init(&h->lock);
  60        h->mn.ops = &mn_opts;
  61        INIT_WORK(&h->del_work, handle_remove);
  62        INIT_LIST_HEAD(&h->del_list);
  63        INIT_LIST_HEAD(&h->lru_list);
  64        h->wq = wq;
  65
  66        ret = mmu_notifier_register(&h->mn, current->mm);
  67        if (ret) {
  68                kfree(h);
  69                return ret;
  70        }
  71
  72        *handler = h;
  73        return 0;
  74}
  75
  76void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
  77{
  78        struct mmu_rb_node *rbnode;
  79        struct rb_node *node;
  80        unsigned long flags;
  81        struct list_head del_list;
  82
  83        /* Unregister first so we don't get any more notifications. */
  84        mmu_notifier_unregister(&handler->mn, handler->mn.mm);
  85
  86        /*
  87         * Make sure the wq delete handler is finished running.  It will not
  88         * be triggered once the mmu notifiers are unregistered above.
  89         */
  90        flush_work(&handler->del_work);
  91
  92        INIT_LIST_HEAD(&del_list);
  93
  94        spin_lock_irqsave(&handler->lock, flags);
  95        while ((node = rb_first_cached(&handler->root))) {
  96                rbnode = rb_entry(node, struct mmu_rb_node, node);
  97                rb_erase_cached(node, &handler->root);
  98                /* move from LRU list to delete list */
  99                list_move(&rbnode->list, &del_list);
 100        }
 101        spin_unlock_irqrestore(&handler->lock, flags);
 102
 103        do_remove(handler, &del_list);
 104
 105        kfree(handler);
 106}
 107
 108int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
 109                       struct mmu_rb_node *mnode)
 110{
 111        struct mmu_rb_node *node;
 112        unsigned long flags;
 113        int ret = 0;
 114
 115        trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len);
 116
 117        if (current->mm != handler->mn.mm)
 118                return -EPERM;
 119
 120        spin_lock_irqsave(&handler->lock, flags);
 121        node = __mmu_rb_search(handler, mnode->addr, mnode->len);
 122        if (node) {
 123                ret = -EINVAL;
 124                goto unlock;
 125        }
 126        __mmu_int_rb_insert(mnode, &handler->root);
 127        list_add(&mnode->list, &handler->lru_list);
 128
 129        ret = handler->ops->insert(handler->ops_arg, mnode);
 130        if (ret) {
 131                __mmu_int_rb_remove(mnode, &handler->root);
 132                list_del(&mnode->list); /* remove from LRU list */
 133        }
 134        mnode->handler = handler;
 135unlock:
 136        spin_unlock_irqrestore(&handler->lock, flags);
 137        return ret;
 138}
 139
 140/* Caller must hold handler lock */
 141static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
 142                                           unsigned long addr,
 143                                           unsigned long len)
 144{
 145        struct mmu_rb_node *node = NULL;
 146
 147        trace_hfi1_mmu_rb_search(addr, len);
 148        if (!handler->ops->filter) {
 149                node = __mmu_int_rb_iter_first(&handler->root, addr,
 150                                               (addr + len) - 1);
 151        } else {
 152                for (node = __mmu_int_rb_iter_first(&handler->root, addr,
 153                                                    (addr + len) - 1);
 154                     node;
 155                     node = __mmu_int_rb_iter_next(node, addr,
 156                                                   (addr + len) - 1)) {
 157                        if (handler->ops->filter(node, addr, len))
 158                                return node;
 159                }
 160        }
 161        return node;
 162}
 163
 164bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
 165                                     unsigned long addr, unsigned long len,
 166                                     struct mmu_rb_node **rb_node)
 167{
 168        struct mmu_rb_node *node;
 169        unsigned long flags;
 170        bool ret = false;
 171
 172        if (current->mm != handler->mn.mm)
 173                return ret;
 174
 175        spin_lock_irqsave(&handler->lock, flags);
 176        node = __mmu_rb_search(handler, addr, len);
 177        if (node) {
 178                if (node->addr == addr && node->len == len)
 179                        goto unlock;
 180                __mmu_int_rb_remove(node, &handler->root);
 181                list_del(&node->list); /* remove from LRU list */
 182                ret = true;
 183        }
 184unlock:
 185        spin_unlock_irqrestore(&handler->lock, flags);
 186        *rb_node = node;
 187        return ret;
 188}
 189
 190void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
 191{
 192        struct mmu_rb_node *rbnode, *ptr;
 193        struct list_head del_list;
 194        unsigned long flags;
 195        bool stop = false;
 196
 197        if (current->mm != handler->mn.mm)
 198                return;
 199
 200        INIT_LIST_HEAD(&del_list);
 201
 202        spin_lock_irqsave(&handler->lock, flags);
 203        list_for_each_entry_safe_reverse(rbnode, ptr, &handler->lru_list,
 204                                         list) {
 205                if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
 206                                        &stop)) {
 207                        __mmu_int_rb_remove(rbnode, &handler->root);
 208                        /* move from LRU list to delete list */
 209                        list_move(&rbnode->list, &del_list);
 210                }
 211                if (stop)
 212                        break;
 213        }
 214        spin_unlock_irqrestore(&handler->lock, flags);
 215
 216        while (!list_empty(&del_list)) {
 217                rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
 218                list_del(&rbnode->list);
 219                handler->ops->remove(handler->ops_arg, rbnode);
 220        }
 221}
 222
 223/*
 224 * It is up to the caller to ensure that this function does not race with the
 225 * mmu invalidate notifier which may be calling the users remove callback on
 226 * 'node'.
 227 */
 228void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
 229                        struct mmu_rb_node *node)
 230{
 231        unsigned long flags;
 232
 233        if (current->mm != handler->mn.mm)
 234                return;
 235
 236        /* Validity of handler and node pointers has been checked by caller. */
 237        trace_hfi1_mmu_rb_remove(node->addr, node->len);
 238        spin_lock_irqsave(&handler->lock, flags);
 239        __mmu_int_rb_remove(node, &handler->root);
 240        list_del(&node->list); /* remove from LRU list */
 241        spin_unlock_irqrestore(&handler->lock, flags);
 242
 243        handler->ops->remove(handler->ops_arg, node);
 244}
 245
 246static int mmu_notifier_range_start(struct mmu_notifier *mn,
 247                const struct mmu_notifier_range *range)
 248{
 249        struct mmu_rb_handler *handler =
 250                container_of(mn, struct mmu_rb_handler, mn);
 251        struct rb_root_cached *root = &handler->root;
 252        struct mmu_rb_node *node, *ptr = NULL;
 253        unsigned long flags;
 254        bool added = false;
 255
 256        spin_lock_irqsave(&handler->lock, flags);
 257        for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1);
 258             node; node = ptr) {
 259                /* Guard against node removal. */
 260                ptr = __mmu_int_rb_iter_next(node, range->start,
 261                                             range->end - 1);
 262                trace_hfi1_mmu_mem_invalidate(node->addr, node->len);
 263                if (handler->ops->invalidate(handler->ops_arg, node)) {
 264                        __mmu_int_rb_remove(node, root);
 265                        /* move from LRU list to delete list */
 266                        list_move(&node->list, &handler->del_list);
 267                        added = true;
 268                }
 269        }
 270        spin_unlock_irqrestore(&handler->lock, flags);
 271
 272        if (added)
 273                queue_work(handler->wq, &handler->del_work);
 274
 275        return 0;
 276}
 277
 278/*
 279 * Call the remove function for the given handler and the list.  This
 280 * is expected to be called with a delete list extracted from handler.
 281 * The caller should not be holding the handler lock.
 282 */
 283static void do_remove(struct mmu_rb_handler *handler,
 284                      struct list_head *del_list)
 285{
 286        struct mmu_rb_node *node;
 287
 288        while (!list_empty(del_list)) {
 289                node = list_first_entry(del_list, struct mmu_rb_node, list);
 290                list_del(&node->list);
 291                handler->ops->remove(handler->ops_arg, node);
 292        }
 293}
 294
 295/*
 296 * Work queue function to remove all nodes that have been queued up to
 297 * be removed.  The key feature is that mm->mmap_lock is not being held
 298 * and the remove callback can sleep while taking it, if needed.
 299 */
 300static void handle_remove(struct work_struct *work)
 301{
 302        struct mmu_rb_handler *handler = container_of(work,
 303                                                struct mmu_rb_handler,
 304                                                del_work);
 305        struct list_head del_list;
 306        unsigned long flags;
 307
 308        /* remove anything that is queued to get removed */
 309        spin_lock_irqsave(&handler->lock, flags);
 310        list_replace_init(&handler->del_list, &del_list);
 311        spin_unlock_irqrestore(&handler->lock, flags);
 312
 313        do_remove(handler, &del_list);
 314}
 315