linux/drivers/acpi/numa/hmat.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (c) 2019, Intel Corporation.
   4 *
   5 * Heterogeneous Memory Attributes Table (HMAT) representation
   6 *
   7 * This program parses and reports the platform's HMAT tables, and registers
   8 * the applicable attributes with the node's interfaces.
   9 */
  10
  11#define pr_fmt(fmt) "acpi/hmat: " fmt
  12#define dev_fmt(fmt) "acpi/hmat: " fmt
  13
  14#include <linux/acpi.h>
  15#include <linux/bitops.h>
  16#include <linux/device.h>
  17#include <linux/init.h>
  18#include <linux/list.h>
  19#include <linux/mm.h>
  20#include <linux/platform_device.h>
  21#include <linux/list_sort.h>
  22#include <linux/memregion.h>
  23#include <linux/memory.h>
  24#include <linux/mutex.h>
  25#include <linux/node.h>
  26#include <linux/sysfs.h>
  27
  28static u8 hmat_revision;
  29
  30static LIST_HEAD(targets);
  31static LIST_HEAD(initiators);
  32static LIST_HEAD(localities);
  33
  34static DEFINE_MUTEX(target_lock);
  35
  36/*
  37 * The defined enum order is used to prioritize attributes to break ties when
  38 * selecting the best performing node.
  39 */
  40enum locality_types {
  41        WRITE_LATENCY,
  42        READ_LATENCY,
  43        WRITE_BANDWIDTH,
  44        READ_BANDWIDTH,
  45};
  46
  47static struct memory_locality *localities_types[4];
  48
  49struct target_cache {
  50        struct list_head node;
  51        struct node_cache_attrs cache_attrs;
  52};
  53
  54struct memory_target {
  55        struct list_head node;
  56        unsigned int memory_pxm;
  57        unsigned int processor_pxm;
  58        struct resource memregions;
  59        struct node_hmem_attrs hmem_attrs;
  60        struct list_head caches;
  61        struct node_cache_attrs cache_attrs;
  62        bool registered;
  63};
  64
  65struct memory_initiator {
  66        struct list_head node;
  67        unsigned int processor_pxm;
  68};
  69
  70struct memory_locality {
  71        struct list_head node;
  72        struct acpi_hmat_locality *hmat_loc;
  73};
  74
  75static struct memory_initiator *find_mem_initiator(unsigned int cpu_pxm)
  76{
  77        struct memory_initiator *initiator;
  78
  79        list_for_each_entry(initiator, &initiators, node)
  80                if (initiator->processor_pxm == cpu_pxm)
  81                        return initiator;
  82        return NULL;
  83}
  84
  85static struct memory_target *find_mem_target(unsigned int mem_pxm)
  86{
  87        struct memory_target *target;
  88
  89        list_for_each_entry(target, &targets, node)
  90                if (target->memory_pxm == mem_pxm)
  91                        return target;
  92        return NULL;
  93}
  94
  95static __init void alloc_memory_initiator(unsigned int cpu_pxm)
  96{
  97        struct memory_initiator *initiator;
  98
  99        if (pxm_to_node(cpu_pxm) == NUMA_NO_NODE)
 100                return;
 101
 102        initiator = find_mem_initiator(cpu_pxm);
 103        if (initiator)
 104                return;
 105
 106        initiator = kzalloc(sizeof(*initiator), GFP_KERNEL);
 107        if (!initiator)
 108                return;
 109
 110        initiator->processor_pxm = cpu_pxm;
 111        list_add_tail(&initiator->node, &initiators);
 112}
 113
 114static __init void alloc_memory_target(unsigned int mem_pxm,
 115                resource_size_t start, resource_size_t len)
 116{
 117        struct memory_target *target;
 118
 119        target = find_mem_target(mem_pxm);
 120        if (!target) {
 121                target = kzalloc(sizeof(*target), GFP_KERNEL);
 122                if (!target)
 123                        return;
 124                target->memory_pxm = mem_pxm;
 125                target->processor_pxm = PXM_INVAL;
 126                target->memregions = (struct resource) {
 127                        .name   = "ACPI mem",
 128                        .start  = 0,
 129                        .end    = -1,
 130                        .flags  = IORESOURCE_MEM,
 131                };
 132                list_add_tail(&target->node, &targets);
 133                INIT_LIST_HEAD(&target->caches);
 134        }
 135
 136        /*
 137         * There are potentially multiple ranges per PXM, so record each
 138         * in the per-target memregions resource tree.
 139         */
 140        if (!__request_region(&target->memregions, start, len, "memory target",
 141                                IORESOURCE_MEM))
 142                pr_warn("failed to reserve %#llx - %#llx in pxm: %d\n",
 143                                start, start + len, mem_pxm);
 144}
 145
 146static __init const char *hmat_data_type(u8 type)
 147{
 148        switch (type) {
 149        case ACPI_HMAT_ACCESS_LATENCY:
 150                return "Access Latency";
 151        case ACPI_HMAT_READ_LATENCY:
 152                return "Read Latency";
 153        case ACPI_HMAT_WRITE_LATENCY:
 154                return "Write Latency";
 155        case ACPI_HMAT_ACCESS_BANDWIDTH:
 156                return "Access Bandwidth";
 157        case ACPI_HMAT_READ_BANDWIDTH:
 158                return "Read Bandwidth";
 159        case ACPI_HMAT_WRITE_BANDWIDTH:
 160                return "Write Bandwidth";
 161        default:
 162                return "Reserved";
 163        }
 164}
 165
 166static __init const char *hmat_data_type_suffix(u8 type)
 167{
 168        switch (type) {
 169        case ACPI_HMAT_ACCESS_LATENCY:
 170        case ACPI_HMAT_READ_LATENCY:
 171        case ACPI_HMAT_WRITE_LATENCY:
 172                return " nsec";
 173        case ACPI_HMAT_ACCESS_BANDWIDTH:
 174        case ACPI_HMAT_READ_BANDWIDTH:
 175        case ACPI_HMAT_WRITE_BANDWIDTH:
 176                return " MB/s";
 177        default:
 178                return "";
 179        }
 180}
 181
 182static u32 hmat_normalize(u16 entry, u64 base, u8 type)
 183{
 184        u32 value;
 185
 186        /*
 187         * Check for invalid and overflow values
 188         */
 189        if (entry == 0xffff || !entry)
 190                return 0;
 191        else if (base > (UINT_MAX / (entry)))
 192                return 0;
 193
 194        /*
 195         * Divide by the base unit for version 1, convert latency from
 196         * picosenonds to nanoseconds if revision 2.
 197         */
 198        value = entry * base;
 199        if (hmat_revision == 1) {
 200                if (value < 10)
 201                        return 0;
 202                value = DIV_ROUND_UP(value, 10);
 203        } else if (hmat_revision == 2) {
 204                switch (type) {
 205                case ACPI_HMAT_ACCESS_LATENCY:
 206                case ACPI_HMAT_READ_LATENCY:
 207                case ACPI_HMAT_WRITE_LATENCY:
 208                        value = DIV_ROUND_UP(value, 1000);
 209                        break;
 210                default:
 211                        break;
 212                }
 213        }
 214        return value;
 215}
 216
 217static void hmat_update_target_access(struct memory_target *target,
 218                                             u8 type, u32 value)
 219{
 220        switch (type) {
 221        case ACPI_HMAT_ACCESS_LATENCY:
 222                target->hmem_attrs.read_latency = value;
 223                target->hmem_attrs.write_latency = value;
 224                break;
 225        case ACPI_HMAT_READ_LATENCY:
 226                target->hmem_attrs.read_latency = value;
 227                break;
 228        case ACPI_HMAT_WRITE_LATENCY:
 229                target->hmem_attrs.write_latency = value;
 230                break;
 231        case ACPI_HMAT_ACCESS_BANDWIDTH:
 232                target->hmem_attrs.read_bandwidth = value;
 233                target->hmem_attrs.write_bandwidth = value;
 234                break;
 235        case ACPI_HMAT_READ_BANDWIDTH:
 236                target->hmem_attrs.read_bandwidth = value;
 237                break;
 238        case ACPI_HMAT_WRITE_BANDWIDTH:
 239                target->hmem_attrs.write_bandwidth = value;
 240                break;
 241        default:
 242                break;
 243        }
 244}
 245
 246static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc)
 247{
 248        struct memory_locality *loc;
 249
 250        loc = kzalloc(sizeof(*loc), GFP_KERNEL);
 251        if (!loc) {
 252                pr_notice_once("Failed to allocate HMAT locality\n");
 253                return;
 254        }
 255
 256        loc->hmat_loc = hmat_loc;
 257        list_add_tail(&loc->node, &localities);
 258
 259        switch (hmat_loc->data_type) {
 260        case ACPI_HMAT_ACCESS_LATENCY:
 261                localities_types[READ_LATENCY] = loc;
 262                localities_types[WRITE_LATENCY] = loc;
 263                break;
 264        case ACPI_HMAT_READ_LATENCY:
 265                localities_types[READ_LATENCY] = loc;
 266                break;
 267        case ACPI_HMAT_WRITE_LATENCY:
 268                localities_types[WRITE_LATENCY] = loc;
 269                break;
 270        case ACPI_HMAT_ACCESS_BANDWIDTH:
 271                localities_types[READ_BANDWIDTH] = loc;
 272                localities_types[WRITE_BANDWIDTH] = loc;
 273                break;
 274        case ACPI_HMAT_READ_BANDWIDTH:
 275                localities_types[READ_BANDWIDTH] = loc;
 276                break;
 277        case ACPI_HMAT_WRITE_BANDWIDTH:
 278                localities_types[WRITE_BANDWIDTH] = loc;
 279                break;
 280        default:
 281                break;
 282        }
 283}
 284
 285static __init int hmat_parse_locality(union acpi_subtable_headers *header,
 286                                      const unsigned long end)
 287{
 288        struct acpi_hmat_locality *hmat_loc = (void *)header;
 289        struct memory_target *target;
 290        unsigned int init, targ, total_size, ipds, tpds;
 291        u32 *inits, *targs, value;
 292        u16 *entries;
 293        u8 type, mem_hier;
 294
 295        if (hmat_loc->header.length < sizeof(*hmat_loc)) {
 296                pr_notice("HMAT: Unexpected locality header length: %u\n",
 297                         hmat_loc->header.length);
 298                return -EINVAL;
 299        }
 300
 301        type = hmat_loc->data_type;
 302        mem_hier = hmat_loc->flags & ACPI_HMAT_MEMORY_HIERARCHY;
 303        ipds = hmat_loc->number_of_initiator_Pds;
 304        tpds = hmat_loc->number_of_target_Pds;
 305        total_size = sizeof(*hmat_loc) + sizeof(*entries) * ipds * tpds +
 306                     sizeof(*inits) * ipds + sizeof(*targs) * tpds;
 307        if (hmat_loc->header.length < total_size) {
 308                pr_notice("HMAT: Unexpected locality header length:%u, minimum required:%u\n",
 309                         hmat_loc->header.length, total_size);
 310                return -EINVAL;
 311        }
 312
 313        pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%u Target Domains:%u Base:%lld\n",
 314                hmat_loc->flags, hmat_data_type(type), ipds, tpds,
 315                hmat_loc->entry_base_unit);
 316
 317        inits = (u32 *)(hmat_loc + 1);
 318        targs = inits + ipds;
 319        entries = (u16 *)(targs + tpds);
 320        for (init = 0; init < ipds; init++) {
 321                alloc_memory_initiator(inits[init]);
 322                for (targ = 0; targ < tpds; targ++) {
 323                        value = hmat_normalize(entries[init * tpds + targ],
 324                                               hmat_loc->entry_base_unit,
 325                                               type);
 326                        pr_info("  Initiator-Target[%u-%u]:%u%s\n",
 327                                inits[init], targs[targ], value,
 328                                hmat_data_type_suffix(type));
 329
 330                        if (mem_hier == ACPI_HMAT_MEMORY) {
 331                                target = find_mem_target(targs[targ]);
 332                                if (target && target->processor_pxm == inits[init])
 333                                        hmat_update_target_access(target, type, value);
 334                        }
 335                }
 336        }
 337
 338        if (mem_hier == ACPI_HMAT_MEMORY)
 339                hmat_add_locality(hmat_loc);
 340
 341        return 0;
 342}
 343
 344static __init int hmat_parse_cache(union acpi_subtable_headers *header,
 345                                   const unsigned long end)
 346{
 347        struct acpi_hmat_cache *cache = (void *)header;
 348        struct memory_target *target;
 349        struct target_cache *tcache;
 350        u32 attrs;
 351
 352        if (cache->header.length < sizeof(*cache)) {
 353                pr_notice("HMAT: Unexpected cache header length: %u\n",
 354                         cache->header.length);
 355                return -EINVAL;
 356        }
 357
 358        attrs = cache->cache_attributes;
 359        pr_info("HMAT: Cache: Domain:%u Size:%llu Attrs:%08x SMBIOS Handles:%d\n",
 360                cache->memory_PD, cache->cache_size, attrs,
 361                cache->number_of_SMBIOShandles);
 362
 363        target = find_mem_target(cache->memory_PD);
 364        if (!target)
 365                return 0;
 366
 367        tcache = kzalloc(sizeof(*tcache), GFP_KERNEL);
 368        if (!tcache) {
 369                pr_notice_once("Failed to allocate HMAT cache info\n");
 370                return 0;
 371        }
 372
 373        tcache->cache_attrs.size = cache->cache_size;
 374        tcache->cache_attrs.level = (attrs & ACPI_HMAT_CACHE_LEVEL) >> 4;
 375        tcache->cache_attrs.line_size = (attrs & ACPI_HMAT_CACHE_LINE_SIZE) >> 16;
 376
 377        switch ((attrs & ACPI_HMAT_CACHE_ASSOCIATIVITY) >> 8) {
 378        case ACPI_HMAT_CA_DIRECT_MAPPED:
 379                tcache->cache_attrs.indexing = NODE_CACHE_DIRECT_MAP;
 380                break;
 381        case ACPI_HMAT_CA_COMPLEX_CACHE_INDEXING:
 382                tcache->cache_attrs.indexing = NODE_CACHE_INDEXED;
 383                break;
 384        case ACPI_HMAT_CA_NONE:
 385        default:
 386                tcache->cache_attrs.indexing = NODE_CACHE_OTHER;
 387                break;
 388        }
 389
 390        switch ((attrs & ACPI_HMAT_WRITE_POLICY) >> 12) {
 391        case ACPI_HMAT_CP_WB:
 392                tcache->cache_attrs.write_policy = NODE_CACHE_WRITE_BACK;
 393                break;
 394        case ACPI_HMAT_CP_WT:
 395                tcache->cache_attrs.write_policy = NODE_CACHE_WRITE_THROUGH;
 396                break;
 397        case ACPI_HMAT_CP_NONE:
 398        default:
 399                tcache->cache_attrs.write_policy = NODE_CACHE_WRITE_OTHER;
 400                break;
 401        }
 402        list_add_tail(&tcache->node, &target->caches);
 403
 404        return 0;
 405}
 406
 407static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *header,
 408                                              const unsigned long end)
 409{
 410        struct acpi_hmat_proximity_domain *p = (void *)header;
 411        struct memory_target *target = NULL;
 412
 413        if (p->header.length != sizeof(*p)) {
 414                pr_notice("HMAT: Unexpected address range header length: %u\n",
 415                         p->header.length);
 416                return -EINVAL;
 417        }
 418
 419        if (hmat_revision == 1)
 420                pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%u Memory Domain:%u\n",
 421                        p->reserved3, p->reserved4, p->flags, p->processor_PD,
 422                        p->memory_PD);
 423        else
 424                pr_info("HMAT: Memory Flags:%04x Processor Domain:%u Memory Domain:%u\n",
 425                        p->flags, p->processor_PD, p->memory_PD);
 426
 427        if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) {
 428                target = find_mem_target(p->memory_PD);
 429                if (!target) {
 430                        pr_debug("HMAT: Memory Domain missing from SRAT\n");
 431                        return -EINVAL;
 432                }
 433        }
 434        if (target && p->flags & ACPI_HMAT_PROCESSOR_PD_VALID) {
 435                int p_node = pxm_to_node(p->processor_PD);
 436
 437                if (p_node == NUMA_NO_NODE) {
 438                        pr_debug("HMAT: Invalid Processor Domain\n");
 439                        return -EINVAL;
 440                }
 441                target->processor_pxm = p->processor_PD;
 442        }
 443
 444        return 0;
 445}
 446
 447static int __init hmat_parse_subtable(union acpi_subtable_headers *header,
 448                                      const unsigned long end)
 449{
 450        struct acpi_hmat_structure *hdr = (void *)header;
 451
 452        if (!hdr)
 453                return -EINVAL;
 454
 455        switch (hdr->type) {
 456        case ACPI_HMAT_TYPE_PROXIMITY:
 457                return hmat_parse_proximity_domain(header, end);
 458        case ACPI_HMAT_TYPE_LOCALITY:
 459                return hmat_parse_locality(header, end);
 460        case ACPI_HMAT_TYPE_CACHE:
 461                return hmat_parse_cache(header, end);
 462        default:
 463                return -EINVAL;
 464        }
 465}
 466
 467static __init int srat_parse_mem_affinity(union acpi_subtable_headers *header,
 468                                          const unsigned long end)
 469{
 470        struct acpi_srat_mem_affinity *ma = (void *)header;
 471
 472        if (!ma)
 473                return -EINVAL;
 474        if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
 475                return 0;
 476        alloc_memory_target(ma->proximity_domain, ma->base_address, ma->length);
 477        return 0;
 478}
 479
 480static u32 hmat_initiator_perf(struct memory_target *target,
 481                               struct memory_initiator *initiator,
 482                               struct acpi_hmat_locality *hmat_loc)
 483{
 484        unsigned int ipds, tpds, i, idx = 0, tdx = 0;
 485        u32 *inits, *targs;
 486        u16 *entries;
 487
 488        ipds = hmat_loc->number_of_initiator_Pds;
 489        tpds = hmat_loc->number_of_target_Pds;
 490        inits = (u32 *)(hmat_loc + 1);
 491        targs = inits + ipds;
 492        entries = (u16 *)(targs + tpds);
 493
 494        for (i = 0; i < ipds; i++) {
 495                if (inits[i] == initiator->processor_pxm) {
 496                        idx = i;
 497                        break;
 498                }
 499        }
 500
 501        if (i == ipds)
 502                return 0;
 503
 504        for (i = 0; i < tpds; i++) {
 505                if (targs[i] == target->memory_pxm) {
 506                        tdx = i;
 507                        break;
 508                }
 509        }
 510        if (i == tpds)
 511                return 0;
 512
 513        return hmat_normalize(entries[idx * tpds + tdx],
 514                              hmat_loc->entry_base_unit,
 515                              hmat_loc->data_type);
 516}
 517
 518static bool hmat_update_best(u8 type, u32 value, u32 *best)
 519{
 520        bool updated = false;
 521
 522        if (!value)
 523                return false;
 524
 525        switch (type) {
 526        case ACPI_HMAT_ACCESS_LATENCY:
 527        case ACPI_HMAT_READ_LATENCY:
 528        case ACPI_HMAT_WRITE_LATENCY:
 529                if (!*best || *best > value) {
 530                        *best = value;
 531                        updated = true;
 532                }
 533                break;
 534        case ACPI_HMAT_ACCESS_BANDWIDTH:
 535        case ACPI_HMAT_READ_BANDWIDTH:
 536        case ACPI_HMAT_WRITE_BANDWIDTH:
 537                if (!*best || *best < value) {
 538                        *best = value;
 539                        updated = true;
 540                }
 541                break;
 542        }
 543
 544        return updated;
 545}
 546
 547static int initiator_cmp(void *priv, struct list_head *a, struct list_head *b)
 548{
 549        struct memory_initiator *ia;
 550        struct memory_initiator *ib;
 551        unsigned long *p_nodes = priv;
 552
 553        ia = list_entry(a, struct memory_initiator, node);
 554        ib = list_entry(b, struct memory_initiator, node);
 555
 556        set_bit(ia->processor_pxm, p_nodes);
 557        set_bit(ib->processor_pxm, p_nodes);
 558
 559        return ia->processor_pxm - ib->processor_pxm;
 560}
 561
 562static void hmat_register_target_initiators(struct memory_target *target)
 563{
 564        static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
 565        struct memory_initiator *initiator;
 566        unsigned int mem_nid, cpu_nid;
 567        struct memory_locality *loc = NULL;
 568        u32 best = 0;
 569        int i;
 570
 571        mem_nid = pxm_to_node(target->memory_pxm);
 572        /*
 573         * If the Address Range Structure provides a local processor pxm, link
 574         * only that one. Otherwise, find the best performance attributes and
 575         * register all initiators that match.
 576         */
 577        if (target->processor_pxm != PXM_INVAL) {
 578                cpu_nid = pxm_to_node(target->processor_pxm);
 579                register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
 580                return;
 581        }
 582
 583        if (list_empty(&localities))
 584                return;
 585
 586        /*
 587         * We need the initiator list sorted so we can use bitmap_clear for
 588         * previously set initiators when we find a better memory accessor.
 589         * We'll also use the sorting to prime the candidate nodes with known
 590         * initiators.
 591         */
 592        bitmap_zero(p_nodes, MAX_NUMNODES);
 593        list_sort(p_nodes, &initiators, initiator_cmp);
 594        for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
 595                loc = localities_types[i];
 596                if (!loc)
 597                        continue;
 598
 599                best = 0;
 600                list_for_each_entry(initiator, &initiators, node) {
 601                        u32 value;
 602
 603                        if (!test_bit(initiator->processor_pxm, p_nodes))
 604                                continue;
 605
 606                        value = hmat_initiator_perf(target, initiator, loc->hmat_loc);
 607                        if (hmat_update_best(loc->hmat_loc->data_type, value, &best))
 608                                bitmap_clear(p_nodes, 0, initiator->processor_pxm);
 609                        if (value != best)
 610                                clear_bit(initiator->processor_pxm, p_nodes);
 611                }
 612                if (best)
 613                        hmat_update_target_access(target, loc->hmat_loc->data_type, best);
 614        }
 615
 616        for_each_set_bit(i, p_nodes, MAX_NUMNODES) {
 617                cpu_nid = pxm_to_node(i);
 618                register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
 619        }
 620}
 621
 622static void hmat_register_target_cache(struct memory_target *target)
 623{
 624        unsigned mem_nid = pxm_to_node(target->memory_pxm);
 625        struct target_cache *tcache;
 626
 627        list_for_each_entry(tcache, &target->caches, node)
 628                node_add_cache(mem_nid, &tcache->cache_attrs);
 629}
 630
 631static void hmat_register_target_perf(struct memory_target *target)
 632{
 633        unsigned mem_nid = pxm_to_node(target->memory_pxm);
 634        node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0);
 635}
 636
 637static void hmat_register_target_device(struct memory_target *target,
 638                struct resource *r)
 639{
 640        /* define a clean / non-busy resource for the platform device */
 641        struct resource res = {
 642                .start = r->start,
 643                .end = r->end,
 644                .flags = IORESOURCE_MEM,
 645        };
 646        struct platform_device *pdev;
 647        struct memregion_info info;
 648        int rc, id;
 649
 650        rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM,
 651                        IORES_DESC_SOFT_RESERVED);
 652        if (rc != REGION_INTERSECTS)
 653                return;
 654
 655        id = memregion_alloc(GFP_KERNEL);
 656        if (id < 0) {
 657                pr_err("memregion allocation failure for %pr\n", &res);
 658                return;
 659        }
 660
 661        pdev = platform_device_alloc("hmem", id);
 662        if (!pdev) {
 663                pr_err("hmem device allocation failure for %pr\n", &res);
 664                goto out_pdev;
 665        }
 666
 667        pdev->dev.numa_node = acpi_map_pxm_to_online_node(target->memory_pxm);
 668        info = (struct memregion_info) {
 669                .target_node = acpi_map_pxm_to_node(target->memory_pxm),
 670        };
 671        rc = platform_device_add_data(pdev, &info, sizeof(info));
 672        if (rc < 0) {
 673                pr_err("hmem memregion_info allocation failure for %pr\n", &res);
 674                goto out_pdev;
 675        }
 676
 677        rc = platform_device_add_resources(pdev, &res, 1);
 678        if (rc < 0) {
 679                pr_err("hmem resource allocation failure for %pr\n", &res);
 680                goto out_resource;
 681        }
 682
 683        rc = platform_device_add(pdev);
 684        if (rc < 0) {
 685                dev_err(&pdev->dev, "device add failed for %pr\n", &res);
 686                goto out_resource;
 687        }
 688
 689        return;
 690
 691out_resource:
 692        put_device(&pdev->dev);
 693out_pdev:
 694        memregion_free(id);
 695}
 696
 697static void hmat_register_target_devices(struct memory_target *target)
 698{
 699        struct resource *res;
 700
 701        /*
 702         * Do not bother creating devices if no driver is available to
 703         * consume them.
 704         */
 705        if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
 706                return;
 707
 708        for (res = target->memregions.child; res; res = res->sibling)
 709                hmat_register_target_device(target, res);
 710}
 711
 712static void hmat_register_target(struct memory_target *target)
 713{
 714        int nid = pxm_to_node(target->memory_pxm);
 715
 716        /*
 717         * Devices may belong to either an offline or online
 718         * node, so unconditionally add them.
 719         */
 720        hmat_register_target_devices(target);
 721
 722        /*
 723         * Skip offline nodes. This can happen when memory
 724         * marked EFI_MEMORY_SP, "specific purpose", is applied
 725         * to all the memory in a promixity domain leading to
 726         * the node being marked offline / unplugged, or if
 727         * memory-only "hotplug" node is offline.
 728         */
 729        if (nid == NUMA_NO_NODE || !node_online(nid))
 730                return;
 731
 732        mutex_lock(&target_lock);
 733        if (!target->registered) {
 734                hmat_register_target_initiators(target);
 735                hmat_register_target_cache(target);
 736                hmat_register_target_perf(target);
 737                target->registered = true;
 738        }
 739        mutex_unlock(&target_lock);
 740}
 741
 742static void hmat_register_targets(void)
 743{
 744        struct memory_target *target;
 745
 746        list_for_each_entry(target, &targets, node)
 747                hmat_register_target(target);
 748}
 749
 750static int hmat_callback(struct notifier_block *self,
 751                         unsigned long action, void *arg)
 752{
 753        struct memory_target *target;
 754        struct memory_notify *mnb = arg;
 755        int pxm, nid = mnb->status_change_nid;
 756
 757        if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
 758                return NOTIFY_OK;
 759
 760        pxm = node_to_pxm(nid);
 761        target = find_mem_target(pxm);
 762        if (!target)
 763                return NOTIFY_OK;
 764
 765        hmat_register_target(target);
 766        return NOTIFY_OK;
 767}
 768
 769static struct notifier_block hmat_callback_nb = {
 770        .notifier_call = hmat_callback,
 771        .priority = 2,
 772};
 773
 774static __init void hmat_free_structures(void)
 775{
 776        struct memory_target *target, *tnext;
 777        struct memory_locality *loc, *lnext;
 778        struct memory_initiator *initiator, *inext;
 779        struct target_cache *tcache, *cnext;
 780
 781        list_for_each_entry_safe(target, tnext, &targets, node) {
 782                struct resource *res, *res_next;
 783
 784                list_for_each_entry_safe(tcache, cnext, &target->caches, node) {
 785                        list_del(&tcache->node);
 786                        kfree(tcache);
 787                }
 788
 789                list_del(&target->node);
 790                res = target->memregions.child;
 791                while (res) {
 792                        res_next = res->sibling;
 793                        __release_region(&target->memregions, res->start,
 794                                        resource_size(res));
 795                        res = res_next;
 796                }
 797                kfree(target);
 798        }
 799
 800        list_for_each_entry_safe(initiator, inext, &initiators, node) {
 801                list_del(&initiator->node);
 802                kfree(initiator);
 803        }
 804
 805        list_for_each_entry_safe(loc, lnext, &localities, node) {
 806                list_del(&loc->node);
 807                kfree(loc);
 808        }
 809}
 810
 811static __init int hmat_init(void)
 812{
 813        struct acpi_table_header *tbl;
 814        enum acpi_hmat_type i;
 815        acpi_status status;
 816
 817        if (srat_disabled())
 818                return 0;
 819
 820        status = acpi_get_table(ACPI_SIG_SRAT, 0, &tbl);
 821        if (ACPI_FAILURE(status))
 822                return 0;
 823
 824        if (acpi_table_parse_entries(ACPI_SIG_SRAT,
 825                                sizeof(struct acpi_table_srat),
 826                                ACPI_SRAT_TYPE_MEMORY_AFFINITY,
 827                                srat_parse_mem_affinity, 0) < 0)
 828                goto out_put;
 829        acpi_put_table(tbl);
 830
 831        status = acpi_get_table(ACPI_SIG_HMAT, 0, &tbl);
 832        if (ACPI_FAILURE(status))
 833                goto out_put;
 834
 835        hmat_revision = tbl->revision;
 836        switch (hmat_revision) {
 837        case 1:
 838        case 2:
 839                break;
 840        default:
 841                pr_notice("Ignoring HMAT: Unknown revision:%d\n", hmat_revision);
 842                goto out_put;
 843        }
 844
 845        for (i = ACPI_HMAT_TYPE_PROXIMITY; i < ACPI_HMAT_TYPE_RESERVED; i++) {
 846                if (acpi_table_parse_entries(ACPI_SIG_HMAT,
 847                                             sizeof(struct acpi_table_hmat), i,
 848                                             hmat_parse_subtable, 0) < 0) {
 849                        pr_notice("Ignoring HMAT: Invalid table");
 850                        goto out_put;
 851                }
 852        }
 853        hmat_register_targets();
 854
 855        /* Keep the table and structures if the notifier may use them */
 856        if (!register_hotmemory_notifier(&hmat_callback_nb))
 857                return 0;
 858out_put:
 859        hmat_free_structures();
 860        acpi_put_table(tbl);
 861        return 0;
 862}
 863device_initcall(hmat_init);
 864