linux/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/types.h>
  24#include <linux/kernel.h>
  25#include <linux/pci.h>
  26#include <linux/errno.h>
  27#include <linux/acpi.h>
  28#include <linux/hash.h>
  29#include <linux/cpufreq.h>
  30#include <linux/log2.h>
  31#include <linux/dmi.h>
  32#include <linux/atomic.h>
  33
  34#include "kfd_priv.h"
  35#include "kfd_crat.h"
  36#include "kfd_topology.h"
  37#include "kfd_device_queue_manager.h"
  38#include "kfd_iommu.h"
  39#include "kfd_svm.h"
  40#include "amdgpu_amdkfd.h"
  41#include "amdgpu_ras.h"
  42
  43/* topology_device_list - Master list of all topology devices */
  44static struct list_head topology_device_list;
  45static struct kfd_system_properties sys_props;
  46
  47static DECLARE_RWSEM(topology_lock);
  48static atomic_t topology_crat_proximity_domain;
  49
  50struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
  51                                                uint32_t proximity_domain)
  52{
  53        struct kfd_topology_device *top_dev;
  54        struct kfd_topology_device *device = NULL;
  55
  56        down_read(&topology_lock);
  57
  58        list_for_each_entry(top_dev, &topology_device_list, list)
  59                if (top_dev->proximity_domain == proximity_domain) {
  60                        device = top_dev;
  61                        break;
  62                }
  63
  64        up_read(&topology_lock);
  65
  66        return device;
  67}
  68
  69struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id)
  70{
  71        struct kfd_topology_device *top_dev = NULL;
  72        struct kfd_topology_device *ret = NULL;
  73
  74        down_read(&topology_lock);
  75
  76        list_for_each_entry(top_dev, &topology_device_list, list)
  77                if (top_dev->gpu_id == gpu_id) {
  78                        ret = top_dev;
  79                        break;
  80                }
  81
  82        up_read(&topology_lock);
  83
  84        return ret;
  85}
  86
  87struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
  88{
  89        struct kfd_topology_device *top_dev;
  90
  91        top_dev = kfd_topology_device_by_id(gpu_id);
  92        if (!top_dev)
  93                return NULL;
  94
  95        return top_dev->gpu;
  96}
  97
  98struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
  99{
 100        struct kfd_topology_device *top_dev;
 101        struct kfd_dev *device = NULL;
 102
 103        down_read(&topology_lock);
 104
 105        list_for_each_entry(top_dev, &topology_device_list, list)
 106                if (top_dev->gpu && top_dev->gpu->pdev == pdev) {
 107                        device = top_dev->gpu;
 108                        break;
 109                }
 110
 111        up_read(&topology_lock);
 112
 113        return device;
 114}
 115
 116struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
 117{
 118        struct kfd_topology_device *top_dev;
 119        struct kfd_dev *device = NULL;
 120
 121        down_read(&topology_lock);
 122
 123        list_for_each_entry(top_dev, &topology_device_list, list)
 124                if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
 125                        device = top_dev->gpu;
 126                        break;
 127                }
 128
 129        up_read(&topology_lock);
 130
 131        return device;
 132}
 133
 134/* Called with write topology_lock acquired */
 135static void kfd_release_topology_device(struct kfd_topology_device *dev)
 136{
 137        struct kfd_mem_properties *mem;
 138        struct kfd_cache_properties *cache;
 139        struct kfd_iolink_properties *iolink;
 140        struct kfd_perf_properties *perf;
 141
 142        list_del(&dev->list);
 143
 144        while (dev->mem_props.next != &dev->mem_props) {
 145                mem = container_of(dev->mem_props.next,
 146                                struct kfd_mem_properties, list);
 147                list_del(&mem->list);
 148                kfree(mem);
 149        }
 150
 151        while (dev->cache_props.next != &dev->cache_props) {
 152                cache = container_of(dev->cache_props.next,
 153                                struct kfd_cache_properties, list);
 154                list_del(&cache->list);
 155                kfree(cache);
 156        }
 157
 158        while (dev->io_link_props.next != &dev->io_link_props) {
 159                iolink = container_of(dev->io_link_props.next,
 160                                struct kfd_iolink_properties, list);
 161                list_del(&iolink->list);
 162                kfree(iolink);
 163        }
 164
 165        while (dev->perf_props.next != &dev->perf_props) {
 166                perf = container_of(dev->perf_props.next,
 167                                struct kfd_perf_properties, list);
 168                list_del(&perf->list);
 169                kfree(perf);
 170        }
 171
 172        kfree(dev);
 173}
 174
 175void kfd_release_topology_device_list(struct list_head *device_list)
 176{
 177        struct kfd_topology_device *dev;
 178
 179        while (!list_empty(device_list)) {
 180                dev = list_first_entry(device_list,
 181                                       struct kfd_topology_device, list);
 182                kfd_release_topology_device(dev);
 183        }
 184}
 185
 186static void kfd_release_live_view(void)
 187{
 188        kfd_release_topology_device_list(&topology_device_list);
 189        memset(&sys_props, 0, sizeof(sys_props));
 190}
 191
 192struct kfd_topology_device *kfd_create_topology_device(
 193                                struct list_head *device_list)
 194{
 195        struct kfd_topology_device *dev;
 196
 197        dev = kfd_alloc_struct(dev);
 198        if (!dev) {
 199                pr_err("No memory to allocate a topology device");
 200                return NULL;
 201        }
 202
 203        INIT_LIST_HEAD(&dev->mem_props);
 204        INIT_LIST_HEAD(&dev->cache_props);
 205        INIT_LIST_HEAD(&dev->io_link_props);
 206        INIT_LIST_HEAD(&dev->perf_props);
 207
 208        list_add_tail(&dev->list, device_list);
 209
 210        return dev;
 211}
 212
 213
 214#define sysfs_show_gen_prop(buffer, offs, fmt, ...)             \
 215                (offs += snprintf(buffer+offs, PAGE_SIZE-offs,  \
 216                                  fmt, __VA_ARGS__))
 217#define sysfs_show_32bit_prop(buffer, offs, name, value) \
 218                sysfs_show_gen_prop(buffer, offs, "%s %u\n", name, value)
 219#define sysfs_show_64bit_prop(buffer, offs, name, value) \
 220                sysfs_show_gen_prop(buffer, offs, "%s %llu\n", name, value)
 221#define sysfs_show_32bit_val(buffer, offs, value) \
 222                sysfs_show_gen_prop(buffer, offs, "%u\n", value)
 223#define sysfs_show_str_val(buffer, offs, value) \
 224                sysfs_show_gen_prop(buffer, offs, "%s\n", value)
 225
 226static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
 227                char *buffer)
 228{
 229        int offs = 0;
 230
 231        /* Making sure that the buffer is an empty string */
 232        buffer[0] = 0;
 233
 234        if (attr == &sys_props.attr_genid) {
 235                sysfs_show_32bit_val(buffer, offs,
 236                                     sys_props.generation_count);
 237        } else if (attr == &sys_props.attr_props) {
 238                sysfs_show_64bit_prop(buffer, offs, "platform_oem",
 239                                      sys_props.platform_oem);
 240                sysfs_show_64bit_prop(buffer, offs, "platform_id",
 241                                      sys_props.platform_id);
 242                sysfs_show_64bit_prop(buffer, offs, "platform_rev",
 243                                      sys_props.platform_rev);
 244        } else {
 245                offs = -EINVAL;
 246        }
 247
 248        return offs;
 249}
 250
 251static void kfd_topology_kobj_release(struct kobject *kobj)
 252{
 253        kfree(kobj);
 254}
 255
 256static const struct sysfs_ops sysprops_ops = {
 257        .show = sysprops_show,
 258};
 259
 260static struct kobj_type sysprops_type = {
 261        .release = kfd_topology_kobj_release,
 262        .sysfs_ops = &sysprops_ops,
 263};
 264
 265static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
 266                char *buffer)
 267{
 268        int offs = 0;
 269        struct kfd_iolink_properties *iolink;
 270
 271        /* Making sure that the buffer is an empty string */
 272        buffer[0] = 0;
 273
 274        iolink = container_of(attr, struct kfd_iolink_properties, attr);
 275        if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu))
 276                return -EPERM;
 277        sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type);
 278        sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj);
 279        sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min);
 280        sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from);
 281        sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to);
 282        sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight);
 283        sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency);
 284        sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency);
 285        sysfs_show_32bit_prop(buffer, offs, "min_bandwidth",
 286                              iolink->min_bandwidth);
 287        sysfs_show_32bit_prop(buffer, offs, "max_bandwidth",
 288                              iolink->max_bandwidth);
 289        sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size",
 290                              iolink->rec_transfer_size);
 291        sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags);
 292
 293        return offs;
 294}
 295
 296static const struct sysfs_ops iolink_ops = {
 297        .show = iolink_show,
 298};
 299
 300static struct kobj_type iolink_type = {
 301        .release = kfd_topology_kobj_release,
 302        .sysfs_ops = &iolink_ops,
 303};
 304
 305static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
 306                char *buffer)
 307{
 308        int offs = 0;
 309        struct kfd_mem_properties *mem;
 310
 311        /* Making sure that the buffer is an empty string */
 312        buffer[0] = 0;
 313
 314        mem = container_of(attr, struct kfd_mem_properties, attr);
 315        if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu))
 316                return -EPERM;
 317        sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type);
 318        sysfs_show_64bit_prop(buffer, offs, "size_in_bytes",
 319                              mem->size_in_bytes);
 320        sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags);
 321        sysfs_show_32bit_prop(buffer, offs, "width", mem->width);
 322        sysfs_show_32bit_prop(buffer, offs, "mem_clk_max",
 323                              mem->mem_clk_max);
 324
 325        return offs;
 326}
 327
 328static const struct sysfs_ops mem_ops = {
 329        .show = mem_show,
 330};
 331
 332static struct kobj_type mem_type = {
 333        .release = kfd_topology_kobj_release,
 334        .sysfs_ops = &mem_ops,
 335};
 336
 337static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
 338                char *buffer)
 339{
 340        int offs = 0;
 341        uint32_t i, j;
 342        struct kfd_cache_properties *cache;
 343
 344        /* Making sure that the buffer is an empty string */
 345        buffer[0] = 0;
 346
 347        cache = container_of(attr, struct kfd_cache_properties, attr);
 348        if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
 349                return -EPERM;
 350        sysfs_show_32bit_prop(buffer, offs, "processor_id_low",
 351                        cache->processor_id_low);
 352        sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level);
 353        sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size);
 354        sysfs_show_32bit_prop(buffer, offs, "cache_line_size",
 355                              cache->cacheline_size);
 356        sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag",
 357                              cache->cachelines_per_tag);
 358        sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
 359        sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
 360        sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
 361        offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
 362        for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
 363                for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
 364                        /* Check each bit */
 365                        offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
 366                                         (cache->sibling_map[i] >> j) & 1);
 367
 368        /* Replace the last "," with end of line */
 369        buffer[offs-1] = '\n';
 370        return offs;
 371}
 372
 373static const struct sysfs_ops cache_ops = {
 374        .show = kfd_cache_show,
 375};
 376
 377static struct kobj_type cache_type = {
 378        .release = kfd_topology_kobj_release,
 379        .sysfs_ops = &cache_ops,
 380};
 381
 382/****** Sysfs of Performance Counters ******/
 383
 384struct kfd_perf_attr {
 385        struct kobj_attribute attr;
 386        uint32_t data;
 387};
 388
 389static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
 390                        char *buf)
 391{
 392        int offs = 0;
 393        struct kfd_perf_attr *attr;
 394
 395        buf[0] = 0;
 396        attr = container_of(attrs, struct kfd_perf_attr, attr);
 397        if (!attr->data) /* invalid data for PMC */
 398                return 0;
 399        else
 400                return sysfs_show_32bit_val(buf, offs, attr->data);
 401}
 402
 403#define KFD_PERF_DESC(_name, _data)                     \
 404{                                                       \
 405        .attr  = __ATTR(_name, 0444, perf_show, NULL),  \
 406        .data = _data,                                  \
 407}
 408
 409static struct kfd_perf_attr perf_attr_iommu[] = {
 410        KFD_PERF_DESC(max_concurrent, 0),
 411        KFD_PERF_DESC(num_counters, 0),
 412        KFD_PERF_DESC(counter_ids, 0),
 413};
 414/****************************************/
 415
 416static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 417                char *buffer)
 418{
 419        int offs = 0;
 420        struct kfd_topology_device *dev;
 421        uint32_t log_max_watch_addr;
 422
 423        /* Making sure that the buffer is an empty string */
 424        buffer[0] = 0;
 425
 426        if (strcmp(attr->name, "gpu_id") == 0) {
 427                dev = container_of(attr, struct kfd_topology_device,
 428                                attr_gpuid);
 429                if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
 430                        return -EPERM;
 431                return sysfs_show_32bit_val(buffer, offs, dev->gpu_id);
 432        }
 433
 434        if (strcmp(attr->name, "name") == 0) {
 435                dev = container_of(attr, struct kfd_topology_device,
 436                                attr_name);
 437
 438                if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
 439                        return -EPERM;
 440                return sysfs_show_str_val(buffer, offs, dev->node_props.name);
 441        }
 442
 443        dev = container_of(attr, struct kfd_topology_device,
 444                        attr_props);
 445        if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
 446                return -EPERM;
 447        sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
 448                              dev->node_props.cpu_cores_count);
 449        sysfs_show_32bit_prop(buffer, offs, "simd_count",
 450                              dev->gpu ? dev->node_props.simd_count : 0);
 451        sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
 452                              dev->node_props.mem_banks_count);
 453        sysfs_show_32bit_prop(buffer, offs, "caches_count",
 454                              dev->node_props.caches_count);
 455        sysfs_show_32bit_prop(buffer, offs, "io_links_count",
 456                              dev->node_props.io_links_count);
 457        sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
 458                              dev->node_props.cpu_core_id_base);
 459        sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
 460                              dev->node_props.simd_id_base);
 461        sysfs_show_32bit_prop(buffer, offs, "max_waves_per_simd",
 462                              dev->node_props.max_waves_per_simd);
 463        sysfs_show_32bit_prop(buffer, offs, "lds_size_in_kb",
 464                              dev->node_props.lds_size_in_kb);
 465        sysfs_show_32bit_prop(buffer, offs, "gds_size_in_kb",
 466                              dev->node_props.gds_size_in_kb);
 467        sysfs_show_32bit_prop(buffer, offs, "num_gws",
 468                              dev->node_props.num_gws);
 469        sysfs_show_32bit_prop(buffer, offs, "wave_front_size",
 470                              dev->node_props.wave_front_size);
 471        sysfs_show_32bit_prop(buffer, offs, "array_count",
 472                              dev->node_props.array_count);
 473        sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine",
 474                              dev->node_props.simd_arrays_per_engine);
 475        sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array",
 476                              dev->node_props.cu_per_simd_array);
 477        sysfs_show_32bit_prop(buffer, offs, "simd_per_cu",
 478                              dev->node_props.simd_per_cu);
 479        sysfs_show_32bit_prop(buffer, offs, "max_slots_scratch_cu",
 480                              dev->node_props.max_slots_scratch_cu);
 481        sysfs_show_32bit_prop(buffer, offs, "vendor_id",
 482                              dev->node_props.vendor_id);
 483        sysfs_show_32bit_prop(buffer, offs, "device_id",
 484                              dev->node_props.device_id);
 485        sysfs_show_32bit_prop(buffer, offs, "location_id",
 486                              dev->node_props.location_id);
 487        sysfs_show_32bit_prop(buffer, offs, "domain",
 488                              dev->node_props.domain);
 489        sysfs_show_32bit_prop(buffer, offs, "drm_render_minor",
 490                              dev->node_props.drm_render_minor);
 491        sysfs_show_64bit_prop(buffer, offs, "hive_id",
 492                              dev->node_props.hive_id);
 493        sysfs_show_32bit_prop(buffer, offs, "num_sdma_engines",
 494                              dev->node_props.num_sdma_engines);
 495        sysfs_show_32bit_prop(buffer, offs, "num_sdma_xgmi_engines",
 496                              dev->node_props.num_sdma_xgmi_engines);
 497        sysfs_show_32bit_prop(buffer, offs, "num_sdma_queues_per_engine",
 498                              dev->node_props.num_sdma_queues_per_engine);
 499        sysfs_show_32bit_prop(buffer, offs, "num_cp_queues",
 500                              dev->node_props.num_cp_queues);
 501
 502        if (dev->gpu) {
 503                log_max_watch_addr =
 504                        __ilog2_u32(dev->gpu->device_info->num_of_watch_points);
 505
 506                if (log_max_watch_addr) {
 507                        dev->node_props.capability |=
 508                                        HSA_CAP_WATCH_POINTS_SUPPORTED;
 509
 510                        dev->node_props.capability |=
 511                                ((log_max_watch_addr <<
 512                                        HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) &
 513                                HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
 514                }
 515
 516                if (dev->gpu->device_info->asic_family == CHIP_TONGA)
 517                        dev->node_props.capability |=
 518                                        HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
 519
 520                sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
 521                        dev->node_props.max_engine_clk_fcompute);
 522
 523                sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL);
 524
 525                sysfs_show_32bit_prop(buffer, offs, "fw_version",
 526                                      dev->gpu->mec_fw_version);
 527                sysfs_show_32bit_prop(buffer, offs, "capability",
 528                                      dev->node_props.capability);
 529                sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
 530                                      dev->gpu->sdma_fw_version);
 531                sysfs_show_64bit_prop(buffer, offs, "unique_id",
 532                                      amdgpu_amdkfd_get_unique_id(dev->gpu->kgd));
 533
 534        }
 535
 536        return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute",
 537                                     cpufreq_quick_get_max(0)/1000);
 538}
 539
 540static const struct sysfs_ops node_ops = {
 541        .show = node_show,
 542};
 543
 544static struct kobj_type node_type = {
 545        .release = kfd_topology_kobj_release,
 546        .sysfs_ops = &node_ops,
 547};
 548
 549static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
 550{
 551        sysfs_remove_file(kobj, attr);
 552        kobject_del(kobj);
 553        kobject_put(kobj);
 554}
 555
 556static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
 557{
 558        struct kfd_iolink_properties *iolink;
 559        struct kfd_cache_properties *cache;
 560        struct kfd_mem_properties *mem;
 561        struct kfd_perf_properties *perf;
 562
 563        if (dev->kobj_iolink) {
 564                list_for_each_entry(iolink, &dev->io_link_props, list)
 565                        if (iolink->kobj) {
 566                                kfd_remove_sysfs_file(iolink->kobj,
 567                                                        &iolink->attr);
 568                                iolink->kobj = NULL;
 569                        }
 570                kobject_del(dev->kobj_iolink);
 571                kobject_put(dev->kobj_iolink);
 572                dev->kobj_iolink = NULL;
 573        }
 574
 575        if (dev->kobj_cache) {
 576                list_for_each_entry(cache, &dev->cache_props, list)
 577                        if (cache->kobj) {
 578                                kfd_remove_sysfs_file(cache->kobj,
 579                                                        &cache->attr);
 580                                cache->kobj = NULL;
 581                        }
 582                kobject_del(dev->kobj_cache);
 583                kobject_put(dev->kobj_cache);
 584                dev->kobj_cache = NULL;
 585        }
 586
 587        if (dev->kobj_mem) {
 588                list_for_each_entry(mem, &dev->mem_props, list)
 589                        if (mem->kobj) {
 590                                kfd_remove_sysfs_file(mem->kobj, &mem->attr);
 591                                mem->kobj = NULL;
 592                        }
 593                kobject_del(dev->kobj_mem);
 594                kobject_put(dev->kobj_mem);
 595                dev->kobj_mem = NULL;
 596        }
 597
 598        if (dev->kobj_perf) {
 599                list_for_each_entry(perf, &dev->perf_props, list) {
 600                        kfree(perf->attr_group);
 601                        perf->attr_group = NULL;
 602                }
 603                kobject_del(dev->kobj_perf);
 604                kobject_put(dev->kobj_perf);
 605                dev->kobj_perf = NULL;
 606        }
 607
 608        if (dev->kobj_node) {
 609                sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
 610                sysfs_remove_file(dev->kobj_node, &dev->attr_name);
 611                sysfs_remove_file(dev->kobj_node, &dev->attr_props);
 612                kobject_del(dev->kobj_node);
 613                kobject_put(dev->kobj_node);
 614                dev->kobj_node = NULL;
 615        }
 616}
 617
 618static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
 619                uint32_t id)
 620{
 621        struct kfd_iolink_properties *iolink;
 622        struct kfd_cache_properties *cache;
 623        struct kfd_mem_properties *mem;
 624        struct kfd_perf_properties *perf;
 625        int ret;
 626        uint32_t i, num_attrs;
 627        struct attribute **attrs;
 628
 629        if (WARN_ON(dev->kobj_node))
 630                return -EEXIST;
 631
 632        /*
 633         * Creating the sysfs folders
 634         */
 635        dev->kobj_node = kfd_alloc_struct(dev->kobj_node);
 636        if (!dev->kobj_node)
 637                return -ENOMEM;
 638
 639        ret = kobject_init_and_add(dev->kobj_node, &node_type,
 640                        sys_props.kobj_nodes, "%d", id);
 641        if (ret < 0) {
 642                kobject_put(dev->kobj_node);
 643                return ret;
 644        }
 645
 646        dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node);
 647        if (!dev->kobj_mem)
 648                return -ENOMEM;
 649
 650        dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node);
 651        if (!dev->kobj_cache)
 652                return -ENOMEM;
 653
 654        dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node);
 655        if (!dev->kobj_iolink)
 656                return -ENOMEM;
 657
 658        dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
 659        if (!dev->kobj_perf)
 660                return -ENOMEM;
 661
 662        /*
 663         * Creating sysfs files for node properties
 664         */
 665        dev->attr_gpuid.name = "gpu_id";
 666        dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE;
 667        sysfs_attr_init(&dev->attr_gpuid);
 668        dev->attr_name.name = "name";
 669        dev->attr_name.mode = KFD_SYSFS_FILE_MODE;
 670        sysfs_attr_init(&dev->attr_name);
 671        dev->attr_props.name = "properties";
 672        dev->attr_props.mode = KFD_SYSFS_FILE_MODE;
 673        sysfs_attr_init(&dev->attr_props);
 674        ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid);
 675        if (ret < 0)
 676                return ret;
 677        ret = sysfs_create_file(dev->kobj_node, &dev->attr_name);
 678        if (ret < 0)
 679                return ret;
 680        ret = sysfs_create_file(dev->kobj_node, &dev->attr_props);
 681        if (ret < 0)
 682                return ret;
 683
 684        i = 0;
 685        list_for_each_entry(mem, &dev->mem_props, list) {
 686                mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
 687                if (!mem->kobj)
 688                        return -ENOMEM;
 689                ret = kobject_init_and_add(mem->kobj, &mem_type,
 690                                dev->kobj_mem, "%d", i);
 691                if (ret < 0) {
 692                        kobject_put(mem->kobj);
 693                        return ret;
 694                }
 695
 696                mem->attr.name = "properties";
 697                mem->attr.mode = KFD_SYSFS_FILE_MODE;
 698                sysfs_attr_init(&mem->attr);
 699                ret = sysfs_create_file(mem->kobj, &mem->attr);
 700                if (ret < 0)
 701                        return ret;
 702                i++;
 703        }
 704
 705        i = 0;
 706        list_for_each_entry(cache, &dev->cache_props, list) {
 707                cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
 708                if (!cache->kobj)
 709                        return -ENOMEM;
 710                ret = kobject_init_and_add(cache->kobj, &cache_type,
 711                                dev->kobj_cache, "%d", i);
 712                if (ret < 0) {
 713                        kobject_put(cache->kobj);
 714                        return ret;
 715                }
 716
 717                cache->attr.name = "properties";
 718                cache->attr.mode = KFD_SYSFS_FILE_MODE;
 719                sysfs_attr_init(&cache->attr);
 720                ret = sysfs_create_file(cache->kobj, &cache->attr);
 721                if (ret < 0)
 722                        return ret;
 723                i++;
 724        }
 725
 726        i = 0;
 727        list_for_each_entry(iolink, &dev->io_link_props, list) {
 728                iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
 729                if (!iolink->kobj)
 730                        return -ENOMEM;
 731                ret = kobject_init_and_add(iolink->kobj, &iolink_type,
 732                                dev->kobj_iolink, "%d", i);
 733                if (ret < 0) {
 734                        kobject_put(iolink->kobj);
 735                        return ret;
 736                }
 737
 738                iolink->attr.name = "properties";
 739                iolink->attr.mode = KFD_SYSFS_FILE_MODE;
 740                sysfs_attr_init(&iolink->attr);
 741                ret = sysfs_create_file(iolink->kobj, &iolink->attr);
 742                if (ret < 0)
 743                        return ret;
 744                i++;
 745        }
 746
 747        /* All hardware blocks have the same number of attributes. */
 748        num_attrs = ARRAY_SIZE(perf_attr_iommu);
 749        list_for_each_entry(perf, &dev->perf_props, list) {
 750                perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
 751                        * num_attrs + sizeof(struct attribute_group),
 752                        GFP_KERNEL);
 753                if (!perf->attr_group)
 754                        return -ENOMEM;
 755
 756                attrs = (struct attribute **)(perf->attr_group + 1);
 757                if (!strcmp(perf->block_name, "iommu")) {
 758                /* Information of IOMMU's num_counters and counter_ids is shown
 759                 * under /sys/bus/event_source/devices/amd_iommu. We don't
 760                 * duplicate here.
 761                 */
 762                        perf_attr_iommu[0].data = perf->max_concurrent;
 763                        for (i = 0; i < num_attrs; i++)
 764                                attrs[i] = &perf_attr_iommu[i].attr.attr;
 765                }
 766                perf->attr_group->name = perf->block_name;
 767                perf->attr_group->attrs = attrs;
 768                ret = sysfs_create_group(dev->kobj_perf, perf->attr_group);
 769                if (ret < 0)
 770                        return ret;
 771        }
 772
 773        return 0;
 774}
 775
 776/* Called with write topology lock acquired */
 777static int kfd_build_sysfs_node_tree(void)
 778{
 779        struct kfd_topology_device *dev;
 780        int ret;
 781        uint32_t i = 0;
 782
 783        list_for_each_entry(dev, &topology_device_list, list) {
 784                ret = kfd_build_sysfs_node_entry(dev, i);
 785                if (ret < 0)
 786                        return ret;
 787                i++;
 788        }
 789
 790        return 0;
 791}
 792
 793/* Called with write topology lock acquired */
 794static void kfd_remove_sysfs_node_tree(void)
 795{
 796        struct kfd_topology_device *dev;
 797
 798        list_for_each_entry(dev, &topology_device_list, list)
 799                kfd_remove_sysfs_node_entry(dev);
 800}
 801
 802static int kfd_topology_update_sysfs(void)
 803{
 804        int ret;
 805
 806        if (!sys_props.kobj_topology) {
 807                sys_props.kobj_topology =
 808                                kfd_alloc_struct(sys_props.kobj_topology);
 809                if (!sys_props.kobj_topology)
 810                        return -ENOMEM;
 811
 812                ret = kobject_init_and_add(sys_props.kobj_topology,
 813                                &sysprops_type,  &kfd_device->kobj,
 814                                "topology");
 815                if (ret < 0) {
 816                        kobject_put(sys_props.kobj_topology);
 817                        return ret;
 818                }
 819
 820                sys_props.kobj_nodes = kobject_create_and_add("nodes",
 821                                sys_props.kobj_topology);
 822                if (!sys_props.kobj_nodes)
 823                        return -ENOMEM;
 824
 825                sys_props.attr_genid.name = "generation_id";
 826                sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE;
 827                sysfs_attr_init(&sys_props.attr_genid);
 828                ret = sysfs_create_file(sys_props.kobj_topology,
 829                                &sys_props.attr_genid);
 830                if (ret < 0)
 831                        return ret;
 832
 833                sys_props.attr_props.name = "system_properties";
 834                sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE;
 835                sysfs_attr_init(&sys_props.attr_props);
 836                ret = sysfs_create_file(sys_props.kobj_topology,
 837                                &sys_props.attr_props);
 838                if (ret < 0)
 839                        return ret;
 840        }
 841
 842        kfd_remove_sysfs_node_tree();
 843
 844        return kfd_build_sysfs_node_tree();
 845}
 846
 847static void kfd_topology_release_sysfs(void)
 848{
 849        kfd_remove_sysfs_node_tree();
 850        if (sys_props.kobj_topology) {
 851                sysfs_remove_file(sys_props.kobj_topology,
 852                                &sys_props.attr_genid);
 853                sysfs_remove_file(sys_props.kobj_topology,
 854                                &sys_props.attr_props);
 855                if (sys_props.kobj_nodes) {
 856                        kobject_del(sys_props.kobj_nodes);
 857                        kobject_put(sys_props.kobj_nodes);
 858                        sys_props.kobj_nodes = NULL;
 859                }
 860                kobject_del(sys_props.kobj_topology);
 861                kobject_put(sys_props.kobj_topology);
 862                sys_props.kobj_topology = NULL;
 863        }
 864}
 865
 866/* Called with write topology_lock acquired */
 867static void kfd_topology_update_device_list(struct list_head *temp_list,
 868                                        struct list_head *master_list)
 869{
 870        while (!list_empty(temp_list)) {
 871                list_move_tail(temp_list->next, master_list);
 872                sys_props.num_devices++;
 873        }
 874}
 875
 876static void kfd_debug_print_topology(void)
 877{
 878        struct kfd_topology_device *dev;
 879
 880        down_read(&topology_lock);
 881
 882        dev = list_last_entry(&topology_device_list,
 883                        struct kfd_topology_device, list);
 884        if (dev) {
 885                if (dev->node_props.cpu_cores_count &&
 886                                dev->node_props.simd_count) {
 887                        pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
 888                                dev->node_props.device_id,
 889                                dev->node_props.vendor_id);
 890                } else if (dev->node_props.cpu_cores_count)
 891                        pr_info("Topology: Add CPU node\n");
 892                else if (dev->node_props.simd_count)
 893                        pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
 894                                dev->node_props.device_id,
 895                                dev->node_props.vendor_id);
 896        }
 897        up_read(&topology_lock);
 898}
 899
 900/* Helper function for intializing platform_xx members of
 901 * kfd_system_properties. Uses OEM info from the last CPU/APU node.
 902 */
 903static void kfd_update_system_properties(void)
 904{
 905        struct kfd_topology_device *dev;
 906
 907        down_read(&topology_lock);
 908        dev = list_last_entry(&topology_device_list,
 909                        struct kfd_topology_device, list);
 910        if (dev) {
 911                sys_props.platform_id =
 912                        (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
 913                sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
 914                sys_props.platform_rev = dev->oem_revision;
 915        }
 916        up_read(&topology_lock);
 917}
 918
 919static void find_system_memory(const struct dmi_header *dm,
 920        void *private)
 921{
 922        struct kfd_mem_properties *mem;
 923        u16 mem_width, mem_clock;
 924        struct kfd_topology_device *kdev =
 925                (struct kfd_topology_device *)private;
 926        const u8 *dmi_data = (const u8 *)(dm + 1);
 927
 928        if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
 929                mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
 930                mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
 931                list_for_each_entry(mem, &kdev->mem_props, list) {
 932                        if (mem_width != 0xFFFF && mem_width != 0)
 933                                mem->width = mem_width;
 934                        if (mem_clock != 0)
 935                                mem->mem_clk_max = mem_clock;
 936                }
 937        }
 938}
 939
 940/*
 941 * Performance counters information is not part of CRAT but we would like to
 942 * put them in the sysfs under topology directory for Thunk to get the data.
 943 * This function is called before updating the sysfs.
 944 */
 945static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
 946{
 947        /* These are the only counters supported so far */
 948        return kfd_iommu_add_perf_counters(kdev);
 949}
 950
 951/* kfd_add_non_crat_information - Add information that is not currently
 952 *      defined in CRAT but is necessary for KFD topology
 953 * @dev - topology device to which addition info is added
 954 */
 955static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
 956{
 957        /* Check if CPU only node. */
 958        if (!kdev->gpu) {
 959                /* Add system memory information */
 960                dmi_walk(find_system_memory, kdev);
 961        }
 962        /* TODO: For GPU node, rearrange code from kfd_topology_add_device */
 963}
 964
 965/* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices.
 966 *      Ignore CRAT for all other devices. AMD APU is identified if both CPU
 967 *      and GPU cores are present.
 968 * @device_list - topology device list created by parsing ACPI CRAT table.
 969 * @return - TRUE if invalid, FALSE is valid.
 970 */
 971static bool kfd_is_acpi_crat_invalid(struct list_head *device_list)
 972{
 973        struct kfd_topology_device *dev;
 974
 975        list_for_each_entry(dev, device_list, list) {
 976                if (dev->node_props.cpu_cores_count &&
 977                        dev->node_props.simd_count)
 978                        return false;
 979        }
 980        pr_info("Ignoring ACPI CRAT on non-APU system\n");
 981        return true;
 982}
 983
 984int kfd_topology_init(void)
 985{
 986        void *crat_image = NULL;
 987        size_t image_size = 0;
 988        int ret;
 989        struct list_head temp_topology_device_list;
 990        int cpu_only_node = 0;
 991        struct kfd_topology_device *kdev;
 992        int proximity_domain;
 993
 994        /* topology_device_list - Master list of all topology devices
 995         * temp_topology_device_list - temporary list created while parsing CRAT
 996         * or VCRAT. Once parsing is complete the contents of list is moved to
 997         * topology_device_list
 998         */
 999
1000        /* Initialize the head for the both the lists */
1001        INIT_LIST_HEAD(&topology_device_list);
1002        INIT_LIST_HEAD(&temp_topology_device_list);
1003        init_rwsem(&topology_lock);
1004
1005        memset(&sys_props, 0, sizeof(sys_props));
1006
1007        /* Proximity domains in ACPI CRAT tables start counting at
1008         * 0. The same should be true for virtual CRAT tables created
1009         * at this stage. GPUs added later in kfd_topology_add_device
1010         * use a counter.
1011         */
1012        proximity_domain = 0;
1013
1014        /*
1015         * Get the CRAT image from the ACPI. If ACPI doesn't have one
1016         * or if ACPI CRAT is invalid create a virtual CRAT.
1017         * NOTE: The current implementation expects all AMD APUs to have
1018         *      CRAT. If no CRAT is available, it is assumed to be a CPU
1019         */
1020        ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
1021        if (!ret) {
1022                ret = kfd_parse_crat_table(crat_image,
1023                                           &temp_topology_device_list,
1024                                           proximity_domain);
1025                if (ret ||
1026                    kfd_is_acpi_crat_invalid(&temp_topology_device_list)) {
1027                        kfd_release_topology_device_list(
1028                                &temp_topology_device_list);
1029                        kfd_destroy_crat_image(crat_image);
1030                        crat_image = NULL;
1031                }
1032        }
1033
1034        if (!crat_image) {
1035                ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
1036                                                    COMPUTE_UNIT_CPU, NULL,
1037                                                    proximity_domain);
1038                cpu_only_node = 1;
1039                if (ret) {
1040                        pr_err("Error creating VCRAT table for CPU\n");
1041                        return ret;
1042                }
1043
1044                ret = kfd_parse_crat_table(crat_image,
1045                                           &temp_topology_device_list,
1046                                           proximity_domain);
1047                if (ret) {
1048                        pr_err("Error parsing VCRAT table for CPU\n");
1049                        goto err;
1050                }
1051        }
1052
1053        kdev = list_first_entry(&temp_topology_device_list,
1054                                struct kfd_topology_device, list);
1055        kfd_add_perf_to_topology(kdev);
1056
1057        down_write(&topology_lock);
1058        kfd_topology_update_device_list(&temp_topology_device_list,
1059                                        &topology_device_list);
1060        atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
1061        ret = kfd_topology_update_sysfs();
1062        up_write(&topology_lock);
1063
1064        if (!ret) {
1065                sys_props.generation_count++;
1066                kfd_update_system_properties();
1067                kfd_debug_print_topology();
1068        } else
1069                pr_err("Failed to update topology in sysfs ret=%d\n", ret);
1070
1071        /* For nodes with GPU, this information gets added
1072         * when GPU is detected (kfd_topology_add_device).
1073         */
1074        if (cpu_only_node) {
1075                /* Add additional information to CPU only node created above */
1076                down_write(&topology_lock);
1077                kdev = list_first_entry(&topology_device_list,
1078                                struct kfd_topology_device, list);
1079                up_write(&topology_lock);
1080                kfd_add_non_crat_information(kdev);
1081        }
1082
1083err:
1084        kfd_destroy_crat_image(crat_image);
1085        return ret;
1086}
1087
1088void kfd_topology_shutdown(void)
1089{
1090        down_write(&topology_lock);
1091        kfd_topology_release_sysfs();
1092        kfd_release_live_view();
1093        up_write(&topology_lock);
1094}
1095
1096static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
1097{
1098        uint32_t hashout;
1099        uint32_t buf[7];
1100        uint64_t local_mem_size;
1101        int i;
1102        struct kfd_local_mem_info local_mem_info;
1103
1104        if (!gpu)
1105                return 0;
1106
1107        amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info);
1108
1109        local_mem_size = local_mem_info.local_mem_size_private +
1110                        local_mem_info.local_mem_size_public;
1111
1112        buf[0] = gpu->pdev->devfn;
1113        buf[1] = gpu->pdev->subsystem_vendor |
1114                (gpu->pdev->subsystem_device << 16);
1115        buf[2] = pci_domain_nr(gpu->pdev->bus);
1116        buf[3] = gpu->pdev->device;
1117        buf[4] = gpu->pdev->bus->number;
1118        buf[5] = lower_32_bits(local_mem_size);
1119        buf[6] = upper_32_bits(local_mem_size);
1120
1121        for (i = 0, hashout = 0; i < 7; i++)
1122                hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
1123
1124        return hashout;
1125}
1126/* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
1127 *              the GPU device is not already present in the topology device
1128 *              list then return NULL. This means a new topology device has to
1129 *              be created for this GPU.
1130 */
1131static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
1132{
1133        struct kfd_topology_device *dev;
1134        struct kfd_topology_device *out_dev = NULL;
1135        struct kfd_mem_properties *mem;
1136        struct kfd_cache_properties *cache;
1137        struct kfd_iolink_properties *iolink;
1138
1139        down_write(&topology_lock);
1140        list_for_each_entry(dev, &topology_device_list, list) {
1141                /* Discrete GPUs need their own topology device list
1142                 * entries. Don't assign them to CPU/APU nodes.
1143                 */
1144                if (!gpu->use_iommu_v2 &&
1145                    dev->node_props.cpu_cores_count)
1146                        continue;
1147
1148                if (!dev->gpu && (dev->node_props.simd_count > 0)) {
1149                        dev->gpu = gpu;
1150                        out_dev = dev;
1151
1152                        list_for_each_entry(mem, &dev->mem_props, list)
1153                                mem->gpu = dev->gpu;
1154                        list_for_each_entry(cache, &dev->cache_props, list)
1155                                cache->gpu = dev->gpu;
1156                        list_for_each_entry(iolink, &dev->io_link_props, list)
1157                                iolink->gpu = dev->gpu;
1158                        break;
1159                }
1160        }
1161        up_write(&topology_lock);
1162        return out_dev;
1163}
1164
1165static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival)
1166{
1167        /*
1168         * TODO: Generate an event for thunk about the arrival/removal
1169         * of the GPU
1170         */
1171}
1172
1173/* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
1174 *              patch this after CRAT parsing.
1175 */
1176static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
1177{
1178        struct kfd_mem_properties *mem;
1179        struct kfd_local_mem_info local_mem_info;
1180
1181        if (!dev)
1182                return;
1183
1184        /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with
1185         * single bank of VRAM local memory.
1186         * for dGPUs - VCRAT reports only one bank of Local Memory
1187         * for APUs - If CRAT from ACPI reports more than one bank, then
1188         *      all the banks will report the same mem_clk_max information
1189         */
1190        amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info);
1191
1192        list_for_each_entry(mem, &dev->mem_props, list)
1193                mem->mem_clk_max = local_mem_info.mem_clk_max;
1194}
1195
1196static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
1197                                        struct kfd_topology_device *target_gpu_dev,
1198                                        struct kfd_iolink_properties *link)
1199{
1200        /* xgmi always supports atomics between links. */
1201        if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI)
1202                return;
1203
1204        /* check pcie support to set cpu(dev) flags for target_gpu_dev link. */
1205        if (target_gpu_dev) {
1206                uint32_t cap;
1207
1208                pcie_capability_read_dword(target_gpu_dev->gpu->pdev,
1209                                PCI_EXP_DEVCAP2, &cap);
1210
1211                if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
1212                             PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
1213                        link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1214                                CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1215        /* set gpu (dev) flags. */
1216        } else {
1217                if (!dev->gpu->pci_atomic_requested ||
1218                                dev->gpu->device_info->asic_family ==
1219                                                        CHIP_HAWAII)
1220                        link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1221                                CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1222        }
1223}
1224
1225static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev,
1226                struct kfd_iolink_properties *outbound_link,
1227                struct kfd_iolink_properties *inbound_link)
1228{
1229        /* CPU -> GPU with PCIe */
1230        if (!to_dev->gpu &&
1231            inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
1232                inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
1233
1234        if (to_dev->gpu) {
1235                /* GPU <-> GPU with PCIe and
1236                 * Vega20 with XGMI
1237                 */
1238                if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS ||
1239                    (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
1240                    to_dev->gpu->device_info->asic_family == CHIP_VEGA20)) {
1241                        outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
1242                        inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
1243                }
1244        }
1245}
1246
1247static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
1248{
1249        struct kfd_iolink_properties *link, *inbound_link;
1250        struct kfd_topology_device *peer_dev;
1251
1252        if (!dev || !dev->gpu)
1253                return;
1254
1255        /* GPU only creates direct links so apply flags setting to all */
1256        list_for_each_entry(link, &dev->io_link_props, list) {
1257                link->flags = CRAT_IOLINK_FLAGS_ENABLED;
1258                kfd_set_iolink_no_atomics(dev, NULL, link);
1259                peer_dev = kfd_topology_device_by_proximity_domain(
1260                                link->node_to);
1261
1262                if (!peer_dev)
1263                        continue;
1264
1265                list_for_each_entry(inbound_link, &peer_dev->io_link_props,
1266                                                                        list) {
1267                        if (inbound_link->node_to != link->node_from)
1268                                continue;
1269
1270                        inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
1271                        kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
1272                        kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
1273                }
1274        }
1275}
1276
1277int kfd_topology_add_device(struct kfd_dev *gpu)
1278{
1279        uint32_t gpu_id;
1280        struct kfd_topology_device *dev;
1281        struct kfd_cu_info cu_info;
1282        int res = 0;
1283        struct list_head temp_topology_device_list;
1284        void *crat_image = NULL;
1285        size_t image_size = 0;
1286        int proximity_domain;
1287        struct amdgpu_device *adev;
1288
1289        INIT_LIST_HEAD(&temp_topology_device_list);
1290
1291        gpu_id = kfd_generate_gpu_id(gpu);
1292
1293        pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
1294
1295        proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
1296
1297        /* Check to see if this gpu device exists in the topology_device_list.
1298         * If so, assign the gpu to that device,
1299         * else create a Virtual CRAT for this gpu device and then parse that
1300         * CRAT to create a new topology device. Once created assign the gpu to
1301         * that topology device
1302         */
1303        dev = kfd_assign_gpu(gpu);
1304        if (!dev) {
1305                res = kfd_create_crat_image_virtual(&crat_image, &image_size,
1306                                                    COMPUTE_UNIT_GPU, gpu,
1307                                                    proximity_domain);
1308                if (res) {
1309                        pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
1310                               gpu_id);
1311                        return res;
1312                }
1313                res = kfd_parse_crat_table(crat_image,
1314                                           &temp_topology_device_list,
1315                                           proximity_domain);
1316                if (res) {
1317                        pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
1318                               gpu_id);
1319                        goto err;
1320                }
1321
1322                down_write(&topology_lock);
1323                kfd_topology_update_device_list(&temp_topology_device_list,
1324                        &topology_device_list);
1325
1326                /* Update the SYSFS tree, since we added another topology
1327                 * device
1328                 */
1329                res = kfd_topology_update_sysfs();
1330                up_write(&topology_lock);
1331
1332                if (!res)
1333                        sys_props.generation_count++;
1334                else
1335                        pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
1336                                                gpu_id, res);
1337                dev = kfd_assign_gpu(gpu);
1338                if (WARN_ON(!dev)) {
1339                        res = -ENODEV;
1340                        goto err;
1341                }
1342        }
1343
1344        dev->gpu_id = gpu_id;
1345        gpu->id = gpu_id;
1346
1347        /* TODO: Move the following lines to function
1348         *      kfd_add_non_crat_information
1349         */
1350
1351        /* Fill-in additional information that is not available in CRAT but
1352         * needed for the topology
1353         */
1354
1355        amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info);
1356
1357        strncpy(dev->node_props.name, gpu->device_info->asic_name,
1358                        KFD_TOPOLOGY_PUBLIC_NAME_SIZE);
1359
1360        dev->node_props.simd_arrays_per_engine =
1361                cu_info.num_shader_arrays_per_engine;
1362
1363        dev->node_props.vendor_id = gpu->pdev->vendor;
1364        dev->node_props.device_id = gpu->pdev->device;
1365        dev->node_props.capability |=
1366                ((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) <<
1367                        HSA_CAP_ASIC_REVISION_SHIFT) &
1368                        HSA_CAP_ASIC_REVISION_MASK);
1369        dev->node_props.location_id = pci_dev_id(gpu->pdev);
1370        dev->node_props.domain = pci_domain_nr(gpu->pdev->bus);
1371        dev->node_props.max_engine_clk_fcompute =
1372                amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd);
1373        dev->node_props.max_engine_clk_ccompute =
1374                cpufreq_quick_get_max(0) / 1000;
1375        dev->node_props.drm_render_minor =
1376                gpu->shared_resources.drm_render_minor;
1377
1378        dev->node_props.hive_id = gpu->hive_id;
1379        dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
1380        dev->node_props.num_sdma_xgmi_engines =
1381                                gpu->device_info->num_xgmi_sdma_engines;
1382        dev->node_props.num_sdma_queues_per_engine =
1383                                gpu->device_info->num_sdma_queues_per_engine;
1384        dev->node_props.num_gws = (dev->gpu->gws &&
1385                dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
1386                amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
1387        dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
1388
1389        kfd_fill_mem_clk_max_info(dev);
1390        kfd_fill_iolink_non_crat_info(dev);
1391
1392        switch (dev->gpu->device_info->asic_family) {
1393        case CHIP_KAVERI:
1394        case CHIP_HAWAII:
1395        case CHIP_TONGA:
1396                dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 <<
1397                        HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1398                        HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1399                break;
1400        case CHIP_CARRIZO:
1401        case CHIP_FIJI:
1402        case CHIP_POLARIS10:
1403        case CHIP_POLARIS11:
1404        case CHIP_POLARIS12:
1405        case CHIP_VEGAM:
1406                pr_debug("Adding doorbell packet type capability\n");
1407                dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
1408                        HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1409                        HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1410                break;
1411        case CHIP_VEGA10:
1412        case CHIP_VEGA12:
1413        case CHIP_VEGA20:
1414        case CHIP_RAVEN:
1415        case CHIP_RENOIR:
1416        case CHIP_ARCTURUS:
1417        case CHIP_ALDEBARAN:
1418        case CHIP_NAVI10:
1419        case CHIP_NAVI12:
1420        case CHIP_NAVI14:
1421        case CHIP_SIENNA_CICHLID:
1422        case CHIP_NAVY_FLOUNDER:
1423        case CHIP_VANGOGH:
1424        case CHIP_DIMGREY_CAVEFISH:
1425        case CHIP_BEIGE_GOBY:
1426        case CHIP_YELLOW_CARP:
1427                dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
1428                        HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1429                        HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1430                break;
1431        default:
1432                WARN(1, "Unexpected ASIC family %u",
1433                     dev->gpu->device_info->asic_family);
1434        }
1435
1436        /*
1437        * Overwrite ATS capability according to needs_iommu_device to fix
1438        * potential missing corresponding bit in CRAT of BIOS.
1439        */
1440        if (dev->gpu->use_iommu_v2)
1441                dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
1442        else
1443                dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
1444
1445        /* Fix errors in CZ CRAT.
1446         * simd_count: Carrizo CRAT reports wrong simd_count, probably
1447         *              because it doesn't consider masked out CUs
1448         * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
1449         */
1450        if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
1451                dev->node_props.simd_count =
1452                        cu_info.simd_per_cu * cu_info.cu_active_number;
1453                dev->node_props.max_waves_per_simd = 10;
1454        }
1455
1456        adev = (struct amdgpu_device *)(dev->gpu->kgd);
1457        /* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
1458        dev->node_props.capability |=
1459                ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
1460                HSA_CAP_SRAM_EDCSUPPORTED : 0;
1461        dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
1462                HSA_CAP_MEM_EDCSUPPORTED : 0;
1463
1464        if (adev->asic_type != CHIP_VEGA10)
1465                dev->node_props.capability |= (adev->ras_enabled != 0) ?
1466                        HSA_CAP_RASEVENTNOTIFY : 0;
1467
1468        if (KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev))
1469                dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
1470
1471        kfd_debug_print_topology();
1472
1473        if (!res)
1474                kfd_notify_gpu_change(gpu_id, 1);
1475err:
1476        kfd_destroy_crat_image(crat_image);
1477        return res;
1478}
1479
1480int kfd_topology_remove_device(struct kfd_dev *gpu)
1481{
1482        struct kfd_topology_device *dev, *tmp;
1483        uint32_t gpu_id;
1484        int res = -ENODEV;
1485
1486        down_write(&topology_lock);
1487
1488        list_for_each_entry_safe(dev, tmp, &topology_device_list, list)
1489                if (dev->gpu == gpu) {
1490                        gpu_id = dev->gpu_id;
1491                        kfd_remove_sysfs_node_entry(dev);
1492                        kfd_release_topology_device(dev);
1493                        sys_props.num_devices--;
1494                        res = 0;
1495                        if (kfd_topology_update_sysfs() < 0)
1496                                kfd_topology_release_sysfs();
1497                        break;
1498                }
1499
1500        up_write(&topology_lock);
1501
1502        if (!res)
1503                kfd_notify_gpu_change(gpu_id, 0);
1504
1505        return res;
1506}
1507
1508/* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
1509 *      topology. If GPU device is found @idx, then valid kfd_dev pointer is
1510 *      returned through @kdev
1511 * Return -     0: On success (@kdev will be NULL for non GPU nodes)
1512 *              -1: If end of list
1513 */
1514int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
1515{
1516
1517        struct kfd_topology_device *top_dev;
1518        uint8_t device_idx = 0;
1519
1520        *kdev = NULL;
1521        down_read(&topology_lock);
1522
1523        list_for_each_entry(top_dev, &topology_device_list, list) {
1524                if (device_idx == idx) {
1525                        *kdev = top_dev->gpu;
1526                        up_read(&topology_lock);
1527                        return 0;
1528                }
1529
1530                device_idx++;
1531        }
1532
1533        up_read(&topology_lock);
1534
1535        return -1;
1536
1537}
1538
1539static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
1540{
1541        int first_cpu_of_numa_node;
1542
1543        if (!cpumask || cpumask == cpu_none_mask)
1544                return -1;
1545        first_cpu_of_numa_node = cpumask_first(cpumask);
1546        if (first_cpu_of_numa_node >= nr_cpu_ids)
1547                return -1;
1548#ifdef CONFIG_X86_64
1549        return cpu_data(first_cpu_of_numa_node).apicid;
1550#else
1551        return first_cpu_of_numa_node;
1552#endif
1553}
1554
1555/* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
1556 *      of the given NUMA node (numa_node_id)
1557 * Return -1 on failure
1558 */
1559int kfd_numa_node_to_apic_id(int numa_node_id)
1560{
1561        if (numa_node_id == -1) {
1562                pr_warn("Invalid NUMA Node. Use online CPU mask\n");
1563                return kfd_cpumask_to_apic_id(cpu_online_mask);
1564        }
1565        return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
1566}
1567
1568void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
1569{
1570        struct kfd_topology_device *dev;
1571
1572        gpu->use_iommu_v2 = false;
1573
1574        if (!gpu->device_info->needs_iommu_device)
1575                return;
1576
1577        down_read(&topology_lock);
1578
1579        /* Only use IOMMUv2 if there is an APU topology node with no GPU
1580         * assigned yet. This GPU will be assigned to it.
1581         */
1582        list_for_each_entry(dev, &topology_device_list, list)
1583                if (dev->node_props.cpu_cores_count &&
1584                    dev->node_props.simd_count &&
1585                    !dev->gpu)
1586                        gpu->use_iommu_v2 = true;
1587
1588        up_read(&topology_lock);
1589}
1590
1591#if defined(CONFIG_DEBUG_FS)
1592
1593int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
1594{
1595        struct kfd_topology_device *dev;
1596        unsigned int i = 0;
1597        int r = 0;
1598
1599        down_read(&topology_lock);
1600
1601        list_for_each_entry(dev, &topology_device_list, list) {
1602                if (!dev->gpu) {
1603                        i++;
1604                        continue;
1605                }
1606
1607                seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
1608                r = dqm_debugfs_hqds(m, dev->gpu->dqm);
1609                if (r)
1610                        break;
1611        }
1612
1613        up_read(&topology_lock);
1614
1615        return r;
1616}
1617
1618int kfd_debugfs_rls_by_device(struct seq_file *m, void *data)
1619{
1620        struct kfd_topology_device *dev;
1621        unsigned int i = 0;
1622        int r = 0;
1623
1624        down_read(&topology_lock);
1625
1626        list_for_each_entry(dev, &topology_device_list, list) {
1627                if (!dev->gpu) {
1628                        i++;
1629                        continue;
1630                }
1631
1632                seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
1633                r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets);
1634                if (r)
1635                        break;
1636        }
1637
1638        up_read(&topology_lock);
1639
1640        return r;
1641}
1642
1643#endif
1644