linux/arch/x86/kernel/cpu/resctrl/internal.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _ASM_X86_RESCTRL_INTERNAL_H
   3#define _ASM_X86_RESCTRL_INTERNAL_H
   4
   5#include <linux/resctrl.h>
   6#include <linux/sched.h>
   7#include <linux/kernfs.h>
   8#include <linux/fs_context.h>
   9#include <linux/jump_label.h>
  10
  11#define MSR_IA32_L3_QOS_CFG             0xc81
  12#define MSR_IA32_L2_QOS_CFG             0xc82
  13#define MSR_IA32_L3_CBM_BASE            0xc90
  14#define MSR_IA32_L2_CBM_BASE            0xd10
  15#define MSR_IA32_MBA_THRTL_BASE         0xd50
  16#define MSR_IA32_MBA_BW_BASE            0xc0000200
  17
  18#define MSR_IA32_QM_CTR                 0x0c8e
  19#define MSR_IA32_QM_EVTSEL              0x0c8d
  20
  21#define L3_QOS_CDP_ENABLE               0x01ULL
  22
  23#define L2_QOS_CDP_ENABLE               0x01ULL
  24
  25/*
  26 * Event IDs are used to program IA32_QM_EVTSEL before reading event
  27 * counter from IA32_QM_CTR
  28 */
  29#define QOS_L3_OCCUP_EVENT_ID           0x01
  30#define QOS_L3_MBM_TOTAL_EVENT_ID       0x02
  31#define QOS_L3_MBM_LOCAL_EVENT_ID       0x03
  32
  33#define CQM_LIMBOCHECK_INTERVAL 1000
  34
  35#define MBM_CNTR_WIDTH_BASE             24
  36#define MBM_OVERFLOW_INTERVAL           1000
  37#define MAX_MBA_BW                      100u
  38#define MBA_IS_LINEAR                   0x4
  39#define MBA_MAX_MBPS                    U32_MAX
  40#define MAX_MBA_BW_AMD                  0x800
  41#define MBM_CNTR_WIDTH_OFFSET_AMD       20
  42
  43#define RMID_VAL_ERROR                  BIT_ULL(63)
  44#define RMID_VAL_UNAVAIL                BIT_ULL(62)
  45/*
  46 * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for
  47 * data to be returned. The counter width is discovered from the hardware
  48 * as an offset from MBM_CNTR_WIDTH_BASE.
  49 */
  50#define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
  51
  52
  53struct rdt_fs_context {
  54        struct kernfs_fs_context        kfc;
  55        bool                            enable_cdpl2;
  56        bool                            enable_cdpl3;
  57        bool                            enable_mba_mbps;
  58};
  59
  60static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
  61{
  62        struct kernfs_fs_context *kfc = fc->fs_private;
  63
  64        return container_of(kfc, struct rdt_fs_context, kfc);
  65}
  66
  67DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
  68DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
  69
  70/**
  71 * struct mon_evt - Entry in the event list of a resource
  72 * @evtid:              event id
  73 * @name:               name of the event
  74 * @list:               entry in &rdt_resource->evt_list
  75 */
  76struct mon_evt {
  77        u32                     evtid;
  78        char                    *name;
  79        struct list_head        list;
  80};
  81
  82/**
  83 * union mon_data_bits - Monitoring details for each event file
  84 * @priv:              Used to store monitoring event data in @u
  85 *                     as kernfs private data
  86 * @rid:               Resource id associated with the event file
  87 * @evtid:             Event id associated with the event file
  88 * @domid:             The domain to which the event file belongs
  89 * @u:                 Name of the bit fields struct
  90 */
  91union mon_data_bits {
  92        void *priv;
  93        struct {
  94                unsigned int rid        : 10;
  95                unsigned int evtid      : 8;
  96                unsigned int domid      : 14;
  97        } u;
  98};
  99
 100struct rmid_read {
 101        struct rdtgroup         *rgrp;
 102        struct rdt_resource     *r;
 103        struct rdt_domain       *d;
 104        int                     evtid;
 105        bool                    first;
 106        u64                     val;
 107};
 108
 109extern unsigned int resctrl_cqm_threshold;
 110extern bool rdt_alloc_capable;
 111extern bool rdt_mon_capable;
 112extern unsigned int rdt_mon_features;
 113extern struct list_head resctrl_schema_all;
 114
 115enum rdt_group_type {
 116        RDTCTRL_GROUP = 0,
 117        RDTMON_GROUP,
 118        RDT_NUM_GROUP,
 119};
 120
 121/**
 122 * enum rdtgrp_mode - Mode of a RDT resource group
 123 * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations
 124 * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed
 125 * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
 126 * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
 127 *                          allowed AND the allocations are Cache Pseudo-Locked
 128 * @RDT_NUM_MODES: Total number of modes
 129 *
 130 * The mode of a resource group enables control over the allowed overlap
 131 * between allocations associated with different resource groups (classes
 132 * of service). User is able to modify the mode of a resource group by
 133 * writing to the "mode" resctrl file associated with the resource group.
 134 *
 135 * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by
 136 * writing the appropriate text to the "mode" file. A resource group enters
 137 * "pseudo-locked" mode after the schemata is written while the resource
 138 * group is in "pseudo-locksetup" mode.
 139 */
 140enum rdtgrp_mode {
 141        RDT_MODE_SHAREABLE = 0,
 142        RDT_MODE_EXCLUSIVE,
 143        RDT_MODE_PSEUDO_LOCKSETUP,
 144        RDT_MODE_PSEUDO_LOCKED,
 145
 146        /* Must be last */
 147        RDT_NUM_MODES,
 148};
 149
 150/**
 151 * struct mongroup - store mon group's data in resctrl fs.
 152 * @mon_data_kn:                kernfs node for the mon_data directory
 153 * @parent:                     parent rdtgrp
 154 * @crdtgrp_list:               child rdtgroup node list
 155 * @rmid:                       rmid for this rdtgroup
 156 */
 157struct mongroup {
 158        struct kernfs_node      *mon_data_kn;
 159        struct rdtgroup         *parent;
 160        struct list_head        crdtgrp_list;
 161        u32                     rmid;
 162};
 163
 164/**
 165 * struct pseudo_lock_region - pseudo-lock region information
 166 * @s:                  Resctrl schema for the resource to which this
 167 *                      pseudo-locked region belongs
 168 * @d:                  RDT domain to which this pseudo-locked region
 169 *                      belongs
 170 * @cbm:                bitmask of the pseudo-locked region
 171 * @lock_thread_wq:     waitqueue used to wait on the pseudo-locking thread
 172 *                      completion
 173 * @thread_done:        variable used by waitqueue to test if pseudo-locking
 174 *                      thread completed
 175 * @cpu:                core associated with the cache on which the setup code
 176 *                      will be run
 177 * @line_size:          size of the cache lines
 178 * @size:               size of pseudo-locked region in bytes
 179 * @kmem:               the kernel memory associated with pseudo-locked region
 180 * @minor:              minor number of character device associated with this
 181 *                      region
 182 * @debugfs_dir:        pointer to this region's directory in the debugfs
 183 *                      filesystem
 184 * @pm_reqs:            Power management QoS requests related to this region
 185 */
 186struct pseudo_lock_region {
 187        struct resctrl_schema   *s;
 188        struct rdt_domain       *d;
 189        u32                     cbm;
 190        wait_queue_head_t       lock_thread_wq;
 191        int                     thread_done;
 192        int                     cpu;
 193        unsigned int            line_size;
 194        unsigned int            size;
 195        void                    *kmem;
 196        unsigned int            minor;
 197        struct dentry           *debugfs_dir;
 198        struct list_head        pm_reqs;
 199};
 200
 201/**
 202 * struct rdtgroup - store rdtgroup's data in resctrl file system.
 203 * @kn:                         kernfs node
 204 * @rdtgroup_list:              linked list for all rdtgroups
 205 * @closid:                     closid for this rdtgroup
 206 * @cpu_mask:                   CPUs assigned to this rdtgroup
 207 * @flags:                      status bits
 208 * @waitcount:                  how many cpus expect to find this
 209 *                              group when they acquire rdtgroup_mutex
 210 * @type:                       indicates type of this rdtgroup - either
 211 *                              monitor only or ctrl_mon group
 212 * @mon:                        mongroup related data
 213 * @mode:                       mode of resource group
 214 * @plr:                        pseudo-locked region
 215 */
 216struct rdtgroup {
 217        struct kernfs_node              *kn;
 218        struct list_head                rdtgroup_list;
 219        u32                             closid;
 220        struct cpumask                  cpu_mask;
 221        int                             flags;
 222        atomic_t                        waitcount;
 223        enum rdt_group_type             type;
 224        struct mongroup                 mon;
 225        enum rdtgrp_mode                mode;
 226        struct pseudo_lock_region       *plr;
 227};
 228
 229/* rdtgroup.flags */
 230#define RDT_DELETED             1
 231
 232/* rftype.flags */
 233#define RFTYPE_FLAGS_CPUS_LIST  1
 234
 235/*
 236 * Define the file type flags for base and info directories.
 237 */
 238#define RFTYPE_INFO                     BIT(0)
 239#define RFTYPE_BASE                     BIT(1)
 240#define RF_CTRLSHIFT                    4
 241#define RF_MONSHIFT                     5
 242#define RF_TOPSHIFT                     6
 243#define RFTYPE_CTRL                     BIT(RF_CTRLSHIFT)
 244#define RFTYPE_MON                      BIT(RF_MONSHIFT)
 245#define RFTYPE_TOP                      BIT(RF_TOPSHIFT)
 246#define RFTYPE_RES_CACHE                BIT(8)
 247#define RFTYPE_RES_MB                   BIT(9)
 248#define RF_CTRL_INFO                    (RFTYPE_INFO | RFTYPE_CTRL)
 249#define RF_MON_INFO                     (RFTYPE_INFO | RFTYPE_MON)
 250#define RF_TOP_INFO                     (RFTYPE_INFO | RFTYPE_TOP)
 251#define RF_CTRL_BASE                    (RFTYPE_BASE | RFTYPE_CTRL)
 252
 253/* List of all resource groups */
 254extern struct list_head rdt_all_groups;
 255
 256extern int max_name_width, max_data_width;
 257
 258int __init rdtgroup_init(void);
 259void __exit rdtgroup_exit(void);
 260
 261/**
 262 * struct rftype - describe each file in the resctrl file system
 263 * @name:       File name
 264 * @mode:       Access mode
 265 * @kf_ops:     File operations
 266 * @flags:      File specific RFTYPE_FLAGS_* flags
 267 * @fflags:     File specific RF_* or RFTYPE_* flags
 268 * @seq_show:   Show content of the file
 269 * @write:      Write to the file
 270 */
 271struct rftype {
 272        char                    *name;
 273        umode_t                 mode;
 274        const struct kernfs_ops *kf_ops;
 275        unsigned long           flags;
 276        unsigned long           fflags;
 277
 278        int (*seq_show)(struct kernfs_open_file *of,
 279                        struct seq_file *sf, void *v);
 280        /*
 281         * write() is the generic write callback which maps directly to
 282         * kernfs write operation and overrides all other operations.
 283         * Maximum write size is determined by ->max_write_len.
 284         */
 285        ssize_t (*write)(struct kernfs_open_file *of,
 286                         char *buf, size_t nbytes, loff_t off);
 287};
 288
 289/**
 290 * struct mbm_state - status for each MBM counter in each domain
 291 * @chunks:     Total data moved (multiply by rdt_group.mon_scale to get bytes)
 292 * @prev_msr:   Value of IA32_QM_CTR for this RMID last time we read it
 293 * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
 294 * @prev_bw:    The most recent bandwidth in MBps
 295 * @delta_bw:   Difference between the current and previous bandwidth
 296 * @delta_comp: Indicates whether to compute the delta_bw
 297 */
 298struct mbm_state {
 299        u64     chunks;
 300        u64     prev_msr;
 301        u64     prev_bw_msr;
 302        u32     prev_bw;
 303        u32     delta_bw;
 304        bool    delta_comp;
 305};
 306
 307/**
 308 * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share
 309 *                        a resource
 310 * @d_resctrl:  Properties exposed to the resctrl file system
 311 * @ctrl_val:   array of cache or mem ctrl values (indexed by CLOSID)
 312 * @mbps_val:   When mba_sc is enabled, this holds the bandwidth in MBps
 313 *
 314 * Members of this structure are accessed via helpers that provide abstraction.
 315 */
 316struct rdt_hw_domain {
 317        struct rdt_domain               d_resctrl;
 318        u32                             *ctrl_val;
 319        u32                             *mbps_val;
 320};
 321
 322static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
 323{
 324        return container_of(r, struct rdt_hw_domain, d_resctrl);
 325}
 326
 327/**
 328 * struct msr_param - set a range of MSRs from a domain
 329 * @res:       The resource to use
 330 * @low:       Beginning index from base MSR
 331 * @high:      End index
 332 */
 333struct msr_param {
 334        struct rdt_resource     *res;
 335        u32                     low;
 336        u32                     high;
 337};
 338
 339static inline bool is_llc_occupancy_enabled(void)
 340{
 341        return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
 342}
 343
 344static inline bool is_mbm_total_enabled(void)
 345{
 346        return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
 347}
 348
 349static inline bool is_mbm_local_enabled(void)
 350{
 351        return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
 352}
 353
 354static inline bool is_mbm_enabled(void)
 355{
 356        return (is_mbm_total_enabled() || is_mbm_local_enabled());
 357}
 358
 359static inline bool is_mbm_event(int e)
 360{
 361        return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
 362                e <= QOS_L3_MBM_LOCAL_EVENT_ID);
 363}
 364
 365struct rdt_parse_data {
 366        struct rdtgroup         *rdtgrp;
 367        char                    *buf;
 368};
 369
 370/**
 371 * struct rdt_hw_resource - arch private attributes of a resctrl resource
 372 * @r_resctrl:          Attributes of the resource used directly by resctrl.
 373 * @num_closid:         Maximum number of closid this hardware can support,
 374 *                      regardless of CDP. This is exposed via
 375 *                      resctrl_arch_get_num_closid() to avoid confusion
 376 *                      with struct resctrl_schema's property of the same name,
 377 *                      which has been corrected for features like CDP.
 378 * @msr_base:           Base MSR address for CBMs
 379 * @msr_update:         Function pointer to update QOS MSRs
 380 * @mon_scale:          cqm counter * mon_scale = occupancy in bytes
 381 * @mbm_width:          Monitor width, to detect and correct for overflow.
 382 * @cdp_enabled:        CDP state of this resource
 383 *
 384 * Members of this structure are either private to the architecture
 385 * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g.
 386 * msr_update and msr_base.
 387 */
 388struct rdt_hw_resource {
 389        struct rdt_resource     r_resctrl;
 390        u32                     num_closid;
 391        unsigned int            msr_base;
 392        void (*msr_update)      (struct rdt_domain *d, struct msr_param *m,
 393                                 struct rdt_resource *r);
 394        unsigned int            mon_scale;
 395        unsigned int            mbm_width;
 396        bool                    cdp_enabled;
 397};
 398
 399static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r)
 400{
 401        return container_of(r, struct rdt_hw_resource, r_resctrl);
 402}
 403
 404int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
 405              struct rdt_domain *d);
 406int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
 407             struct rdt_domain *d);
 408
 409extern struct mutex rdtgroup_mutex;
 410
 411extern struct rdt_hw_resource rdt_resources_all[];
 412extern struct rdtgroup rdtgroup_default;
 413DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
 414
 415extern struct dentry *debugfs_resctrl;
 416
 417enum resctrl_res_level {
 418        RDT_RESOURCE_L3,
 419        RDT_RESOURCE_L2,
 420        RDT_RESOURCE_MBA,
 421
 422        /* Must be the last */
 423        RDT_NUM_RESOURCES,
 424};
 425
 426static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res)
 427{
 428        struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res);
 429
 430        hw_res++;
 431        return &hw_res->r_resctrl;
 432}
 433
 434static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
 435{
 436        return rdt_resources_all[l].cdp_enabled;
 437}
 438
 439int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
 440
 441/*
 442 * To return the common struct rdt_resource, which is contained in struct
 443 * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource.
 444 */
 445#define for_each_rdt_resource(r)                                              \
 446        for (r = &rdt_resources_all[0].r_resctrl;                             \
 447             r <= &rdt_resources_all[RDT_NUM_RESOURCES - 1].r_resctrl;        \
 448             r = resctrl_inc(r))
 449
 450#define for_each_capable_rdt_resource(r)                                      \
 451        for_each_rdt_resource(r)                                              \
 452                if (r->alloc_capable || r->mon_capable)
 453
 454#define for_each_alloc_capable_rdt_resource(r)                                \
 455        for_each_rdt_resource(r)                                              \
 456                if (r->alloc_capable)
 457
 458#define for_each_mon_capable_rdt_resource(r)                                  \
 459        for_each_rdt_resource(r)                                              \
 460                if (r->mon_capable)
 461
 462#define for_each_alloc_enabled_rdt_resource(r)                                \
 463        for_each_rdt_resource(r)                                              \
 464                if (r->alloc_enabled)
 465
 466#define for_each_mon_enabled_rdt_resource(r)                                  \
 467        for_each_rdt_resource(r)                                              \
 468                if (r->mon_enabled)
 469
 470/* CPUID.(EAX=10H, ECX=ResID=1).EAX */
 471union cpuid_0x10_1_eax {
 472        struct {
 473                unsigned int cbm_len:5;
 474        } split;
 475        unsigned int full;
 476};
 477
 478/* CPUID.(EAX=10H, ECX=ResID=3).EAX */
 479union cpuid_0x10_3_eax {
 480        struct {
 481                unsigned int max_delay:12;
 482        } split;
 483        unsigned int full;
 484};
 485
 486/* CPUID.(EAX=10H, ECX=ResID).EDX */
 487union cpuid_0x10_x_edx {
 488        struct {
 489                unsigned int cos_max:16;
 490        } split;
 491        unsigned int full;
 492};
 493
 494void rdt_last_cmd_clear(void);
 495void rdt_last_cmd_puts(const char *s);
 496__printf(1, 2)
 497void rdt_last_cmd_printf(const char *fmt, ...);
 498
 499void rdt_ctrl_update(void *arg);
 500struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
 501void rdtgroup_kn_unlock(struct kernfs_node *kn);
 502int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
 503int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
 504                             umode_t mask);
 505struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
 506                                   struct list_head **pos);
 507ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 508                                char *buf, size_t nbytes, loff_t off);
 509int rdtgroup_schemata_show(struct kernfs_open_file *of,
 510                           struct seq_file *s, void *v);
 511bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
 512                           unsigned long cbm, int closid, bool exclusive);
 513unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
 514                                  unsigned long cbm);
 515enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
 516int rdtgroup_tasks_assigned(struct rdtgroup *r);
 517int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
 518int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
 519bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
 520bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
 521int rdt_pseudo_lock_init(void);
 522void rdt_pseudo_lock_release(void);
 523int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
 524void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
 525struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
 526int closids_supported(void);
 527void closid_free(int closid);
 528int alloc_rmid(void);
 529void free_rmid(u32 rmid);
 530int rdt_get_mon_l3_config(struct rdt_resource *r);
 531void mon_event_count(void *info);
 532int rdtgroup_mondata_show(struct seq_file *m, void *arg);
 533void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
 534                                    unsigned int dom_id);
 535void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
 536                                    struct rdt_domain *d);
 537void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
 538                    struct rdt_domain *d, struct rdtgroup *rdtgrp,
 539                    int evtid, int first);
 540void mbm_setup_overflow_handler(struct rdt_domain *dom,
 541                                unsigned long delay_ms);
 542void mbm_handle_overflow(struct work_struct *work);
 543void __init intel_rdt_mbm_apply_quirk(void);
 544bool is_mba_sc(struct rdt_resource *r);
 545void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
 546u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
 547void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
 548void cqm_handle_limbo(struct work_struct *work);
 549bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
 550void __check_limbo(struct rdt_domain *d, bool force_free);
 551void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
 552void __init thread_throttle_mode_init(void);
 553
 554#endif /* _ASM_X86_RESCTRL_INTERNAL_H */
 555