linux/lib/cpu_rmap.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * cpu_rmap.c: CPU affinity reverse-map support
   4 * Copyright 2011 Solarflare Communications Inc.
   5 */
   6
   7#include <linux/cpu_rmap.h>
   8#include <linux/interrupt.h>
   9#include <linux/export.h>
  10
  11/*
  12 * These functions maintain a mapping from CPUs to some ordered set of
  13 * objects with CPU affinities.  This can be seen as a reverse-map of
  14 * CPU affinity.  However, we do not assume that the object affinities
  15 * cover all CPUs in the system.  For those CPUs not directly covered
  16 * by object affinities, we attempt to find a nearest object based on
  17 * CPU topology.
  18 */
  19
  20/**
  21 * alloc_cpu_rmap - allocate CPU affinity reverse-map
  22 * @size: Number of objects to be mapped
  23 * @flags: Allocation flags e.g. %GFP_KERNEL
  24 */
  25struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
  26{
  27        struct cpu_rmap *rmap;
  28        unsigned int cpu;
  29        size_t obj_offset;
  30
  31        /* This is a silly number of objects, and we use u16 indices. */
  32        if (size > 0xffff)
  33                return NULL;
  34
  35        /* Offset of object pointer array from base structure */
  36        obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]),
  37                           sizeof(void *));
  38
  39        rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags);
  40        if (!rmap)
  41                return NULL;
  42
  43        kref_init(&rmap->refcount);
  44        rmap->obj = (void **)((char *)rmap + obj_offset);
  45
  46        /* Initially assign CPUs to objects on a rota, since we have
  47         * no idea where the objects are.  Use infinite distance, so
  48         * any object with known distance is preferable.  Include the
  49         * CPUs that are not present/online, since we definitely want
  50         * any newly-hotplugged CPUs to have some object assigned.
  51         */
  52        for_each_possible_cpu(cpu) {
  53                rmap->near[cpu].index = cpu % size;
  54                rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
  55        }
  56
  57        rmap->size = size;
  58        return rmap;
  59}
  60EXPORT_SYMBOL(alloc_cpu_rmap);
  61
  62/**
  63 * cpu_rmap_release - internal reclaiming helper called from kref_put
  64 * @ref: kref to struct cpu_rmap
  65 */
  66static void cpu_rmap_release(struct kref *ref)
  67{
  68        struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
  69        kfree(rmap);
  70}
  71
  72/**
  73 * cpu_rmap_get - internal helper to get new ref on a cpu_rmap
  74 * @rmap: reverse-map allocated with alloc_cpu_rmap()
  75 */
  76static inline void cpu_rmap_get(struct cpu_rmap *rmap)
  77{
  78        kref_get(&rmap->refcount);
  79}
  80
  81/**
  82 * cpu_rmap_put - release ref on a cpu_rmap
  83 * @rmap: reverse-map allocated with alloc_cpu_rmap()
  84 */
  85int cpu_rmap_put(struct cpu_rmap *rmap)
  86{
  87        return kref_put(&rmap->refcount, cpu_rmap_release);
  88}
  89EXPORT_SYMBOL(cpu_rmap_put);
  90
  91/* Reevaluate nearest object for given CPU, comparing with the given
  92 * neighbours at the given distance.
  93 */
  94static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu,
  95                                const struct cpumask *mask, u16 dist)
  96{
  97        int neigh;
  98
  99        for_each_cpu(neigh, mask) {
 100                if (rmap->near[cpu].dist > dist &&
 101                    rmap->near[neigh].dist <= dist) {
 102                        rmap->near[cpu].index = rmap->near[neigh].index;
 103                        rmap->near[cpu].dist = dist;
 104                        return true;
 105                }
 106        }
 107        return false;
 108}
 109
 110#ifdef DEBUG
 111static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
 112{
 113        unsigned index;
 114        unsigned int cpu;
 115
 116        pr_info("cpu_rmap %p, %s:\n", rmap, prefix);
 117
 118        for_each_possible_cpu(cpu) {
 119                index = rmap->near[cpu].index;
 120                pr_info("cpu %d -> obj %u (distance %u)\n",
 121                        cpu, index, rmap->near[cpu].dist);
 122        }
 123}
 124#else
 125static inline void
 126debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
 127{
 128}
 129#endif
 130
 131/**
 132 * cpu_rmap_add - add object to a rmap
 133 * @rmap: CPU rmap allocated with alloc_cpu_rmap()
 134 * @obj: Object to add to rmap
 135 *
 136 * Return index of object.
 137 */
 138int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
 139{
 140        u16 index;
 141
 142        BUG_ON(rmap->used >= rmap->size);
 143        index = rmap->used++;
 144        rmap->obj[index] = obj;
 145        return index;
 146}
 147EXPORT_SYMBOL(cpu_rmap_add);
 148
 149/**
 150 * cpu_rmap_update - update CPU rmap following a change of object affinity
 151 * @rmap: CPU rmap to update
 152 * @index: Index of object whose affinity changed
 153 * @affinity: New CPU affinity of object
 154 */
 155int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
 156                    const struct cpumask *affinity)
 157{
 158        cpumask_var_t update_mask;
 159        unsigned int cpu;
 160
 161        if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL)))
 162                return -ENOMEM;
 163
 164        /* Invalidate distance for all CPUs for which this used to be
 165         * the nearest object.  Mark those CPUs for update.
 166         */
 167        for_each_online_cpu(cpu) {
 168                if (rmap->near[cpu].index == index) {
 169                        rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
 170                        cpumask_set_cpu(cpu, update_mask);
 171                }
 172        }
 173
 174        debug_print_rmap(rmap, "after invalidating old distances");
 175
 176        /* Set distance to 0 for all CPUs in the new affinity mask.
 177         * Mark all CPUs within their NUMA nodes for update.
 178         */
 179        for_each_cpu(cpu, affinity) {
 180                rmap->near[cpu].index = index;
 181                rmap->near[cpu].dist = 0;
 182                cpumask_or(update_mask, update_mask,
 183                           cpumask_of_node(cpu_to_node(cpu)));
 184        }
 185
 186        debug_print_rmap(rmap, "after updating neighbours");
 187
 188        /* Update distances based on topology */
 189        for_each_cpu(cpu, update_mask) {
 190                if (cpu_rmap_copy_neigh(rmap, cpu,
 191                                        topology_sibling_cpumask(cpu), 1))
 192                        continue;
 193                if (cpu_rmap_copy_neigh(rmap, cpu,
 194                                        topology_core_cpumask(cpu), 2))
 195                        continue;
 196                if (cpu_rmap_copy_neigh(rmap, cpu,
 197                                        cpumask_of_node(cpu_to_node(cpu)), 3))
 198                        continue;
 199                /* We could continue into NUMA node distances, but for now
 200                 * we give up.
 201                 */
 202        }
 203
 204        debug_print_rmap(rmap, "after copying neighbours");
 205
 206        free_cpumask_var(update_mask);
 207        return 0;
 208}
 209EXPORT_SYMBOL(cpu_rmap_update);
 210
 211/* Glue between IRQ affinity notifiers and CPU rmaps */
 212
 213struct irq_glue {
 214        struct irq_affinity_notify notify;
 215        struct cpu_rmap *rmap;
 216        u16 index;
 217};
 218
 219/**
 220 * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
 221 * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
 222 *
 223 * Must be called in process context, before freeing the IRQs.
 224 */
 225void free_irq_cpu_rmap(struct cpu_rmap *rmap)
 226{
 227        struct irq_glue *glue;
 228        u16 index;
 229
 230        if (!rmap)
 231                return;
 232
 233        for (index = 0; index < rmap->used; index++) {
 234                glue = rmap->obj[index];
 235                irq_set_affinity_notifier(glue->notify.irq, NULL);
 236        }
 237
 238        cpu_rmap_put(rmap);
 239}
 240EXPORT_SYMBOL(free_irq_cpu_rmap);
 241
 242/**
 243 * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
 244 * @notify: struct irq_affinity_notify passed by irq/manage.c
 245 * @mask: cpu mask for new SMP affinity
 246 *
 247 * This is executed in workqueue context.
 248 */
 249static void
 250irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
 251{
 252        struct irq_glue *glue =
 253                container_of(notify, struct irq_glue, notify);
 254        int rc;
 255
 256        rc = cpu_rmap_update(glue->rmap, glue->index, mask);
 257        if (rc)
 258                pr_warn("irq_cpu_rmap_notify: update failed: %d\n", rc);
 259}
 260
 261/**
 262 * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
 263 * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
 264 */
 265static void irq_cpu_rmap_release(struct kref *ref)
 266{
 267        struct irq_glue *glue =
 268                container_of(ref, struct irq_glue, notify.kref);
 269
 270        cpu_rmap_put(glue->rmap);
 271        kfree(glue);
 272}
 273
 274/**
 275 * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
 276 * @rmap: The reverse-map
 277 * @irq: The IRQ number
 278 *
 279 * This adds an IRQ affinity notifier that will update the reverse-map
 280 * automatically.
 281 *
 282 * Must be called in process context, after the IRQ is allocated but
 283 * before it is bound with request_irq().
 284 */
 285int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
 286{
 287        struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL);
 288        int rc;
 289
 290        if (!glue)
 291                return -ENOMEM;
 292        glue->notify.notify = irq_cpu_rmap_notify;
 293        glue->notify.release = irq_cpu_rmap_release;
 294        glue->rmap = rmap;
 295        cpu_rmap_get(rmap);
 296        glue->index = cpu_rmap_add(rmap, glue);
 297        rc = irq_set_affinity_notifier(irq, &glue->notify);
 298        if (rc) {
 299                cpu_rmap_put(glue->rmap);
 300                kfree(glue);
 301        }
 302        return rc;
 303}
 304EXPORT_SYMBOL(irq_cpu_rmap_add);
 305