linux/lib/cpu_rmap.c
<<
>>
Prefs
   1/*
   2 * cpu_rmap.c: CPU affinity reverse-map support
   3 * Copyright 2011 Solarflare Communications Inc.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of the GNU General Public License version 2 as published
   7 * by the Free Software Foundation, incorporated herein by reference.
   8 */
   9
  10#include <linux/cpu_rmap.h>
  11#include <linux/interrupt.h>
  12#include <linux/export.h>
  13
  14/*
  15 * These functions maintain a mapping from CPUs to some ordered set of
  16 * objects with CPU affinities.  This can be seen as a reverse-map of
  17 * CPU affinity.  However, we do not assume that the object affinities
  18 * cover all CPUs in the system.  For those CPUs not directly covered
  19 * by object affinities, we attempt to find a nearest object based on
  20 * CPU topology.
  21 */
  22
  23/**
  24 * alloc_cpu_rmap - allocate CPU affinity reverse-map
  25 * @size: Number of objects to be mapped
  26 * @flags: Allocation flags e.g. %GFP_KERNEL
  27 */
  28struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
  29{
  30        struct cpu_rmap *rmap;
  31        unsigned int cpu;
  32        size_t obj_offset;
  33
  34        /* This is a silly number of objects, and we use u16 indices. */
  35        if (size > 0xffff)
  36                return NULL;
  37
  38        /* Offset of object pointer array from base structure */
  39        obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]),
  40                           sizeof(void *));
  41
  42        rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags);
  43        if (!rmap)
  44                return NULL;
  45
  46        kref_init(&rmap->refcount);
  47        rmap->obj = (void **)((char *)rmap + obj_offset);
  48
  49        /* Initially assign CPUs to objects on a rota, since we have
  50         * no idea where the objects are.  Use infinite distance, so
  51         * any object with known distance is preferable.  Include the
  52         * CPUs that are not present/online, since we definitely want
  53         * any newly-hotplugged CPUs to have some object assigned.
  54         */
  55        for_each_possible_cpu(cpu) {
  56                rmap->near[cpu].index = cpu % size;
  57                rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
  58        }
  59
  60        rmap->size = size;
  61        return rmap;
  62}
  63EXPORT_SYMBOL(alloc_cpu_rmap);
  64
  65/**
  66 * cpu_rmap_release - internal reclaiming helper called from kref_put
  67 * @ref: kref to struct cpu_rmap
  68 */
  69static void cpu_rmap_release(struct kref *ref)
  70{
  71        struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
  72        kfree(rmap);
  73}
  74
  75/**
  76 * cpu_rmap_get - internal helper to get new ref on a cpu_rmap
  77 * @rmap: reverse-map allocated with alloc_cpu_rmap()
  78 */
  79static inline void cpu_rmap_get(struct cpu_rmap *rmap)
  80{
  81        kref_get(&rmap->refcount);
  82}
  83
  84/**
  85 * cpu_rmap_put - release ref on a cpu_rmap
  86 * @rmap: reverse-map allocated with alloc_cpu_rmap()
  87 */
  88int cpu_rmap_put(struct cpu_rmap *rmap)
  89{
  90        return kref_put(&rmap->refcount, cpu_rmap_release);
  91}
  92EXPORT_SYMBOL(cpu_rmap_put);
  93
  94/* Reevaluate nearest object for given CPU, comparing with the given
  95 * neighbours at the given distance.
  96 */
  97static bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu,
  98                                const struct cpumask *mask, u16 dist)
  99{
 100        int neigh;
 101
 102        for_each_cpu(neigh, mask) {
 103                if (rmap->near[cpu].dist > dist &&
 104                    rmap->near[neigh].dist <= dist) {
 105                        rmap->near[cpu].index = rmap->near[neigh].index;
 106                        rmap->near[cpu].dist = dist;
 107                        return true;
 108                }
 109        }
 110        return false;
 111}
 112
 113#ifdef DEBUG
 114static void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
 115{
 116        unsigned index;
 117        unsigned int cpu;
 118
 119        pr_info("cpu_rmap %p, %s:\n", rmap, prefix);
 120
 121        for_each_possible_cpu(cpu) {
 122                index = rmap->near[cpu].index;
 123                pr_info("cpu %d -> obj %u (distance %u)\n",
 124                        cpu, index, rmap->near[cpu].dist);
 125        }
 126}
 127#else
 128static inline void
 129debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
 130{
 131}
 132#endif
 133
 134/**
 135 * cpu_rmap_add - add object to a rmap
 136 * @rmap: CPU rmap allocated with alloc_cpu_rmap()
 137 * @obj: Object to add to rmap
 138 *
 139 * Return index of object.
 140 */
 141int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
 142{
 143        u16 index;
 144
 145        BUG_ON(rmap->used >= rmap->size);
 146        index = rmap->used++;
 147        rmap->obj[index] = obj;
 148        return index;
 149}
 150EXPORT_SYMBOL(cpu_rmap_add);
 151
 152/**
 153 * cpu_rmap_update - update CPU rmap following a change of object affinity
 154 * @rmap: CPU rmap to update
 155 * @index: Index of object whose affinity changed
 156 * @affinity: New CPU affinity of object
 157 */
 158int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
 159                    const struct cpumask *affinity)
 160{
 161        cpumask_var_t update_mask;
 162        unsigned int cpu;
 163
 164        if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL)))
 165                return -ENOMEM;
 166
 167        /* Invalidate distance for all CPUs for which this used to be
 168         * the nearest object.  Mark those CPUs for update.
 169         */
 170        for_each_online_cpu(cpu) {
 171                if (rmap->near[cpu].index == index) {
 172                        rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
 173                        cpumask_set_cpu(cpu, update_mask);
 174                }
 175        }
 176
 177        debug_print_rmap(rmap, "after invalidating old distances");
 178
 179        /* Set distance to 0 for all CPUs in the new affinity mask.
 180         * Mark all CPUs within their NUMA nodes for update.
 181         */
 182        for_each_cpu(cpu, affinity) {
 183                rmap->near[cpu].index = index;
 184                rmap->near[cpu].dist = 0;
 185                cpumask_or(update_mask, update_mask,
 186                           cpumask_of_node(cpu_to_node(cpu)));
 187        }
 188
 189        debug_print_rmap(rmap, "after updating neighbours");
 190
 191        /* Update distances based on topology */
 192        for_each_cpu(cpu, update_mask) {
 193                if (cpu_rmap_copy_neigh(rmap, cpu,
 194                                        topology_sibling_cpumask(cpu), 1))
 195                        continue;
 196                if (cpu_rmap_copy_neigh(rmap, cpu,
 197                                        topology_core_cpumask(cpu), 2))
 198                        continue;
 199                if (cpu_rmap_copy_neigh(rmap, cpu,
 200                                        cpumask_of_node(cpu_to_node(cpu)), 3))
 201                        continue;
 202                /* We could continue into NUMA node distances, but for now
 203                 * we give up.
 204                 */
 205        }
 206
 207        debug_print_rmap(rmap, "after copying neighbours");
 208
 209        free_cpumask_var(update_mask);
 210        return 0;
 211}
 212EXPORT_SYMBOL(cpu_rmap_update);
 213
 214/* Glue between IRQ affinity notifiers and CPU rmaps */
 215
 216struct irq_glue {
 217        struct irq_affinity_notify notify;
 218        struct cpu_rmap *rmap;
 219        u16 index;
 220};
 221
 222/**
 223 * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
 224 * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
 225 *
 226 * Must be called in process context, before freeing the IRQs.
 227 */
 228void free_irq_cpu_rmap(struct cpu_rmap *rmap)
 229{
 230        struct irq_glue *glue;
 231        u16 index;
 232
 233        if (!rmap)
 234                return;
 235
 236        for (index = 0; index < rmap->used; index++) {
 237                glue = rmap->obj[index];
 238                irq_set_affinity_notifier(glue->notify.irq, NULL);
 239        }
 240
 241        cpu_rmap_put(rmap);
 242}
 243EXPORT_SYMBOL(free_irq_cpu_rmap);
 244
 245/**
 246 * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
 247 * @notify: struct irq_affinity_notify passed by irq/manage.c
 248 * @mask: cpu mask for new SMP affinity
 249 *
 250 * This is executed in workqueue context.
 251 */
 252static void
 253irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
 254{
 255        struct irq_glue *glue =
 256                container_of(notify, struct irq_glue, notify);
 257        int rc;
 258
 259        rc = cpu_rmap_update(glue->rmap, glue->index, mask);
 260        if (rc)
 261                pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
 262}
 263
 264/**
 265 * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
 266 * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
 267 */
 268static void irq_cpu_rmap_release(struct kref *ref)
 269{
 270        struct irq_glue *glue =
 271                container_of(ref, struct irq_glue, notify.kref);
 272
 273        cpu_rmap_put(glue->rmap);
 274        kfree(glue);
 275}
 276
 277/**
 278 * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
 279 * @rmap: The reverse-map
 280 * @irq: The IRQ number
 281 *
 282 * This adds an IRQ affinity notifier that will update the reverse-map
 283 * automatically.
 284 *
 285 * Must be called in process context, after the IRQ is allocated but
 286 * before it is bound with request_irq().
 287 */
 288int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
 289{
 290        struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL);
 291        int rc;
 292
 293        if (!glue)
 294                return -ENOMEM;
 295        glue->notify.notify = irq_cpu_rmap_notify;
 296        glue->notify.release = irq_cpu_rmap_release;
 297        glue->rmap = rmap;
 298        cpu_rmap_get(rmap);
 299        glue->index = cpu_rmap_add(rmap, glue);
 300        rc = irq_set_affinity_notifier(irq, &glue->notify);
 301        if (rc) {
 302                cpu_rmap_put(glue->rmap);
 303                kfree(glue);
 304        }
 305        return rc;
 306}
 307EXPORT_SYMBOL(irq_cpu_rmap_add);
 308