dpdk/lib/sched/rte_sched.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2014 Intel Corporation
   3 */
   4
   5#ifndef __INCLUDE_RTE_SCHED_H__
   6#define __INCLUDE_RTE_SCHED_H__
   7
   8#ifdef __cplusplus
   9extern "C" {
  10#endif
  11
  12/**
  13 * @file
  14 * RTE Hierarchical Scheduler
  15 *
  16 * The hierarchical scheduler prioritizes the transmission of packets
  17 * from different users and traffic classes according to the Service
  18 * Level Agreements (SLAs) defined for the current network node.
  19 *
  20 * The scheduler supports thousands of packet queues grouped under a
  21 * 5-level hierarchy:
  22 *     1. Port:
  23 *           - Typical usage: output Ethernet port;
  24 *           - Multiple ports are scheduled in round robin order with
  25 *          equal priority;
  26 *     2. Subport:
  27 *           - Typical usage: group of users;
  28 *           - Traffic shaping using the token bucket algorithm
  29 *          (one bucket per subport);
  30 *           - Upper limit enforced per traffic class at subport level;
  31 *           - Lower priority traffic classes able to reuse subport
  32 *          bandwidth currently unused by higher priority traffic
  33 *          classes of the same subport;
  34 *           - When any subport traffic class is oversubscribed
  35 *          (configuration time event), the usage of subport member
  36 *          pipes with high demand for that traffic class pipes is
  37 *          truncated to a dynamically adjusted value with no
  38 *             impact to low demand pipes;
  39 *     3. Pipe:
  40 *           - Typical usage: individual user/subscriber;
  41 *           - Traffic shaping using the token bucket algorithm
  42 *          (one bucket per pipe);
  43 *     4. Traffic class:
  44 *           - Traffic classes of the same pipe handled in strict
  45 *          priority order;
  46 *           - Upper limit enforced per traffic class at the pipe level;
  47 *           - Lower priority traffic classes able to reuse pipe
  48 *          bandwidth currently unused by higher priority traffic
  49 *          classes of the same pipe;
  50 *     5. Queue:
  51 *           - Typical usage: queue hosting packets from one or
  52 *          multiple connections of same traffic class belonging to
  53 *          the same user;
  54 *           - Weighted Round Robin (WRR) is used to service the
  55 *          queues within same pipe lowest priority traffic class (best-effort).
  56 *
  57 */
  58
  59#include <rte_compat.h>
  60#include <rte_mbuf.h>
  61#include <rte_meter.h>
  62
  63/** Congestion Management */
  64#include "rte_red.h"
  65#include "rte_pie.h"
  66
  67/** Maximum number of queues per pipe.
  68 * Note that the multiple queues (power of 2) can only be assigned to
  69 * lowest priority (best-effort) traffic class. Other higher priority traffic
  70 * classes can only have one queue.
  71 * Can not change.
  72 *
  73 * @see struct rte_sched_port_params
  74 */
  75#define RTE_SCHED_QUEUES_PER_PIPE    16
  76
  77/** Number of WRR queues for best-effort traffic class per pipe.
  78 *
  79 * @see struct rte_sched_pipe_params
  80 */
  81#define RTE_SCHED_BE_QUEUES_PER_PIPE    4
  82
  83/** Number of traffic classes per pipe (as well as subport).
  84 * @see struct rte_sched_subport_params
  85 * @see struct rte_sched_pipe_params
  86 */
  87#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE    \
  88(RTE_SCHED_QUEUES_PER_PIPE - RTE_SCHED_BE_QUEUES_PER_PIPE + 1)
  89
  90/** Best-effort traffic class ID
  91 * Can not change.
  92 */
  93#define RTE_SCHED_TRAFFIC_CLASS_BE    (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1)
  94
  95/*
  96 * Ethernet framing overhead. Overhead fields per Ethernet frame:
  97 * 1. Preamble:                             7 bytes;
  98 * 2. Start of Frame Delimiter (SFD):       1 byte;
  99 * 3. Frame Check Sequence (FCS):           4 bytes;
 100 * 4. Inter Frame Gap (IFG):               12 bytes.
 101 *
 102 * The FCS is considered overhead only if not included in the packet
 103 * length (field pkt_len of struct rte_mbuf).
 104 *
 105 * @see struct rte_sched_port_params
 106 */
 107#ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT
 108#define RTE_SCHED_FRAME_OVERHEAD_DEFAULT      24
 109#endif
 110
 111/**
 112 * Congestion Management (CMAN) mode
 113 *
 114 * This is used for controlling the admission of packets into a packet queue or
 115 * group of packet queues on congestion.
 116 *
 117 * The *Random Early Detection (RED)* algorithm works by proactively dropping
 118 * more and more input packets as the queue occupancy builds up. When the queue
 119 * is full or almost full, RED effectively works as *tail drop*. The *Weighted
 120 * RED* algorithm uses a separate set of RED thresholds for each packet color.
 121 *
 122 * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly
 123 * drops a packet at the onset of the congestion and tries to control the
 124 * latency around the target value. The congestion detection, however, is based
 125 * on the queueing latency instead of the queue length like RED. For more
 126 * information, refer RFC8033.
 127 */
 128enum rte_sched_cman_mode {
 129        RTE_SCHED_CMAN_RED, /**< Random Early Detection (RED) */
 130        RTE_SCHED_CMAN_PIE, /**< Proportional Integral Controller Enhanced (PIE) */
 131};
 132
 133/*
 134 * Pipe configuration parameters. The period and credits_per_period
 135 * parameters are measured in bytes, with one byte meaning the time
 136 * duration associated with the transmission of one byte on the
 137 * physical medium of the output port, with pipe or pipe traffic class
 138 * rate (measured as percentage of output port rate) determined as
 139 * credits_per_period divided by period. One credit represents one
 140 * byte.
 141 */
 142struct rte_sched_pipe_params {
 143        /** Token bucket rate (measured in bytes per second) */
 144        uint64_t tb_rate;
 145
 146        /** Token bucket size (measured in credits) */
 147        uint64_t tb_size;
 148
 149        /** Traffic class rates (measured in bytes per second) */
 150        uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 151
 152        /** Enforcement period (measured in milliseconds) */
 153        uint64_t tc_period;
 154
 155        /** Best-effort traffic class oversubscription weight */
 156        uint8_t tc_ov_weight;
 157
 158        /** WRR weights of best-effort traffic class queues */
 159        uint8_t wrr_weights[RTE_SCHED_BE_QUEUES_PER_PIPE];
 160};
 161
 162/*
 163 * Congestion Management configuration parameters.
 164 */
 165struct rte_sched_cman_params {
 166        /** Congestion Management mode */
 167        enum rte_sched_cman_mode cman_mode;
 168
 169        union {
 170                /** RED parameters */
 171                struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS];
 172
 173                /** PIE parameters */
 174                struct rte_pie_params pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 175        };
 176};
 177
 178/*
 179 * Subport configuration parameters. The period and credits_per_period
 180 * parameters are measured in bytes, with one byte meaning the time
 181 * duration associated with the transmission of one byte on the
 182 * physical medium of the output port, with pipe or pipe traffic class
 183 * rate (measured as percentage of output port rate) determined as
 184 * credits_per_period divided by period. One credit represents one
 185 * byte.
 186 */
 187struct rte_sched_subport_params {
 188        /** Number of subport pipes.
 189         * The subport can enable/allocate fewer pipes than the maximum
 190         * number set through struct port_params::n_max_pipes_per_subport,
 191         * as needed, to avoid memory allocation for the queues of the
 192         * pipes that are not really needed.
 193         */
 194        uint32_t n_pipes_per_subport_enabled;
 195
 196        /** Packet queue size for each traffic class.
 197         * All the pipes within the same subport share the similar
 198         * configuration for the queues.
 199         */
 200        uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 201
 202        /** Pipe profile table.
 203         * Every pipe is configured using one of the profiles from this table.
 204         */
 205        struct rte_sched_pipe_params *pipe_profiles;
 206
 207        /** Profiles in the pipe profile table */
 208        uint32_t n_pipe_profiles;
 209
 210        /** Max allowed profiles in the pipe profile table */
 211        uint32_t n_max_pipe_profiles;
 212
 213        /** Congestion Management parameters
 214         * If NULL the congestion management is disabled for the subport,
 215         * otherwise proper parameters need to be provided.
 216         */
 217        struct rte_sched_cman_params *cman_params;
 218};
 219
 220struct rte_sched_subport_profile_params {
 221        /** Token bucket rate (measured in bytes per second) */
 222        uint64_t tb_rate;
 223
 224        /** Token bucket size (measured in credits) */
 225        uint64_t tb_size;
 226
 227        /** Traffic class rates (measured in bytes per second) */
 228        uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 229
 230        /** Enforcement period for rates (measured in milliseconds) */
 231        uint64_t tc_period;
 232};
 233
 234/** Subport statistics */
 235struct rte_sched_subport_stats {
 236        /** Number of packets successfully written */
 237        uint64_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 238
 239        /** Number of packets dropped */
 240        uint64_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 241
 242        /** Number of bytes successfully written for each traffic class */
 243        uint64_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 244
 245        /** Number of bytes dropped for each traffic class */
 246        uint64_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 247
 248        /** Number of packets dropped by congestion management scheme */
 249        uint64_t n_pkts_cman_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
 250};
 251
 252/** Queue statistics */
 253struct rte_sched_queue_stats {
 254        /** Packets successfully written */
 255        uint64_t n_pkts;
 256
 257        /** Packets dropped */
 258        uint64_t n_pkts_dropped;
 259
 260        /** Packets dropped by congestion management scheme */
 261        uint64_t n_pkts_cman_dropped;
 262
 263        /** Bytes successfully written */
 264        uint64_t n_bytes;
 265
 266        /** Bytes dropped */
 267        uint64_t n_bytes_dropped;
 268};
 269
 270/** Port configuration parameters. */
 271struct rte_sched_port_params {
 272        /** Name of the port to be associated */
 273        const char *name;
 274
 275        /** CPU socket ID */
 276        int socket;
 277
 278        /** Output port rate (measured in bytes per second) */
 279        uint64_t rate;
 280
 281        /** Maximum Ethernet frame size (measured in bytes).
 282         * Should not include the framing overhead.
 283         */
 284        uint32_t mtu;
 285
 286        /** Framing overhead per packet (measured in bytes) */
 287        uint32_t frame_overhead;
 288
 289        /** Number of subports */
 290        uint32_t n_subports_per_port;
 291
 292        /** subport profile table.
 293         * Every pipe is configured using one of the profiles from this table.
 294         */
 295        struct rte_sched_subport_profile_params *subport_profiles;
 296
 297        /** Profiles in the pipe profile table */
 298        uint32_t n_subport_profiles;
 299
 300        /** Max allowed profiles in the pipe profile table */
 301        uint32_t n_max_subport_profiles;
 302
 303        /** Maximum number of subport pipes.
 304         * This parameter is used to reserve a fixed number of bits
 305         * in struct rte_mbuf::sched.queue_id for the pipe_id for all
 306         * the subports of the same port.
 307         */
 308        uint32_t n_pipes_per_subport;
 309};
 310
 311/*
 312 * Configuration
 313 *
 314 ***/
 315
 316/**
 317 * Hierarchical scheduler port configuration
 318 *
 319 * @param params
 320 *   Port scheduler configuration parameter structure
 321 * @return
 322 *   Handle to port scheduler instance upon success or NULL otherwise.
 323 */
 324struct rte_sched_port *
 325rte_sched_port_config(struct rte_sched_port_params *params);
 326
 327/**
 328 * Hierarchical scheduler port free
 329 *
 330 * @param port
 331 *   Handle to port scheduler instance.
 332 *   If port is NULL, no operation is performed.
 333 */
 334void
 335rte_sched_port_free(struct rte_sched_port *port);
 336
 337/**
 338 * Hierarchical scheduler pipe profile add
 339 *
 340 * @param port
 341 *   Handle to port scheduler instance
 342 * @param subport_id
 343 *   Subport ID
 344 * @param params
 345 *   Pipe profile parameters
 346 * @param pipe_profile_id
 347 *   Set to valid profile id when profile is added successfully.
 348 * @return
 349 *   0 upon success, error code otherwise
 350 */
 351int
 352rte_sched_subport_pipe_profile_add(struct rte_sched_port *port,
 353        uint32_t subport_id,
 354        struct rte_sched_pipe_params *params,
 355        uint32_t *pipe_profile_id);
 356
 357/**
 358 * @warning
 359 * @b EXPERIMENTAL: this API may change without prior notice.
 360 *
 361 * Hierarchical scheduler subport bandwidth profile add
 362 * Note that this function is safe to use in runtime for adding new
 363 * subport bandwidth profile as it doesn't have any impact on hierarchical
 364 * structure of the scheduler.
 365 * @param port
 366 *   Handle to port scheduler instance
 367 * @param profile
 368 *   Subport bandwidth profile
 369 * @param subport_profile_id
 370 *   Subport profile id
 371 * @return
 372 *   0 upon success, error code otherwise
 373 */
 374__rte_experimental
 375int
 376rte_sched_port_subport_profile_add(struct rte_sched_port *port,
 377        struct rte_sched_subport_profile_params *profile,
 378        uint32_t *subport_profile_id);
 379
 380/**
 381 * Hierarchical scheduler subport configuration
 382 * Note that this function is safe to use at runtime
 383 * to configure subport bandwidth profile.
 384 * @param port
 385 *   Handle to port scheduler instance
 386 * @param subport_id
 387 *   Subport ID
 388 * @param params
 389 *   Subport configuration parameters. Must be non-NULL
 390 *   for first invocation (i.e initialization) for a given
 391 *   subport. Ignored (recommended value is NULL) for all
 392 *   subsequent invocation on the same subport.
 393 * @param subport_profile_id
 394 *   ID of subport bandwidth profile
 395 * @return
 396 *   0 upon success, error code otherwise
 397 */
 398int
 399rte_sched_subport_config(struct rte_sched_port *port,
 400        uint32_t subport_id,
 401        struct rte_sched_subport_params *params,
 402        uint32_t subport_profile_id);
 403
 404/**
 405 * Hierarchical scheduler pipe configuration
 406 *
 407 * @param port
 408 *   Handle to port scheduler instance
 409 * @param subport_id
 410 *   Subport ID
 411 * @param pipe_id
 412 *   Pipe ID within subport
 413 * @param pipe_profile
 414 *   ID of subport-level pre-configured pipe profile
 415 * @return
 416 *   0 upon success, error code otherwise
 417 */
 418int
 419rte_sched_pipe_config(struct rte_sched_port *port,
 420        uint32_t subport_id,
 421        uint32_t pipe_id,
 422        int32_t pipe_profile);
 423
 424/**
 425 * Hierarchical scheduler memory footprint size per port
 426 *
 427 * @param port_params
 428 *   Port scheduler configuration parameter structure
 429 * @param subport_params
 430 *   Array of subport parameter structures
 431 * @return
 432 *   Memory footprint size in bytes upon success, 0 otherwise
 433 */
 434uint32_t
 435rte_sched_port_get_memory_footprint(struct rte_sched_port_params *port_params,
 436        struct rte_sched_subport_params **subport_params);
 437/*
 438 * Statistics
 439 *
 440 ***/
 441
 442/**
 443 * Hierarchical scheduler subport statistics read
 444 *
 445 * @param port
 446 *   Handle to port scheduler instance
 447 * @param subport_id
 448 *   Subport ID
 449 * @param stats
 450 *   Pointer to pre-allocated subport statistics structure where the statistics
 451 *   counters should be stored
 452 * @param tc_ov
 453 *   Pointer to pre-allocated RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE-entry array
 454 *   where the oversubscription status for each of the subport traffic classes
 455 *   should be stored.
 456 * @return
 457 *   0 upon success, error code otherwise
 458 */
 459int
 460rte_sched_subport_read_stats(struct rte_sched_port *port,
 461        uint32_t subport_id,
 462        struct rte_sched_subport_stats *stats,
 463        uint32_t *tc_ov);
 464
 465/**
 466 * Hierarchical scheduler queue statistics read
 467 *
 468 * @param port
 469 *   Handle to port scheduler instance
 470 * @param queue_id
 471 *   Queue ID within port scheduler
 472 * @param stats
 473 *   Pointer to pre-allocated subport statistics structure where the statistics
 474 *   counters should be stored
 475 * @param qlen
 476 *   Pointer to pre-allocated variable where the current queue length
 477 *   should be stored.
 478 * @return
 479 *   0 upon success, error code otherwise
 480 */
 481int
 482rte_sched_queue_read_stats(struct rte_sched_port *port,
 483        uint32_t queue_id,
 484        struct rte_sched_queue_stats *stats,
 485        uint16_t *qlen);
 486
 487/**
 488 * Scheduler hierarchy path write to packet descriptor. Typically
 489 * called by the packet classification stage.
 490 *
 491 * @param port
 492 *   Handle to port scheduler instance
 493 * @param pkt
 494 *   Packet descriptor handle
 495 * @param subport
 496 *   Subport ID
 497 * @param pipe
 498 *   Pipe ID within subport
 499 * @param traffic_class
 500 *   Traffic class ID within pipe (0 .. RTE_SCHED_TRAFFIC_CLASS_BE)
 501 * @param queue
 502 *   Queue ID within pipe traffic class, 0 for high priority TCs, and
 503 *   0 .. (RTE_SCHED_BE_QUEUES_PER_PIPE - 1) for best-effort TC
 504 * @param color
 505 *   Packet color set
 506 */
 507void
 508rte_sched_port_pkt_write(struct rte_sched_port *port,
 509                         struct rte_mbuf *pkt,
 510                         uint32_t subport, uint32_t pipe, uint32_t traffic_class,
 511                         uint32_t queue, enum rte_color color);
 512
 513/**
 514 * Scheduler hierarchy path read from packet descriptor (struct
 515 * rte_mbuf). Typically called as part of the hierarchical scheduler
 516 * enqueue operation. The subport, pipe, traffic class and queue
 517 * parameters need to be pre-allocated by the caller.
 518 *
 519 * @param port
 520 *   Handle to port scheduler instance
 521 * @param pkt
 522 *   Packet descriptor handle
 523 * @param subport
 524 *   Subport ID
 525 * @param pipe
 526 *   Pipe ID within subport
 527 * @param traffic_class
 528 *   Traffic class ID within pipe (0 .. RTE_SCHED_TRAFFIC_CLASS_BE)
 529 * @param queue
 530 *   Queue ID within pipe traffic class, 0 for high priority TCs, and
 531 *   0 .. (RTE_SCHED_BE_QUEUES_PER_PIPE - 1) for best-effort TC
 532 */
 533void
 534rte_sched_port_pkt_read_tree_path(struct rte_sched_port *port,
 535                                  const struct rte_mbuf *pkt,
 536                                  uint32_t *subport, uint32_t *pipe,
 537                                  uint32_t *traffic_class, uint32_t *queue);
 538
 539enum rte_color
 540rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt);
 541
 542/**
 543 * Hierarchical scheduler port enqueue. Writes up to n_pkts to port
 544 * scheduler and returns the number of packets actually written. For
 545 * each packet, the port scheduler queue to write the packet to is
 546 * identified by reading the hierarchy path from the packet
 547 * descriptor; if the queue is full or congested and the packet is not
 548 * written to the queue, then the packet is automatically dropped
 549 * without any action required from the caller.
 550 *
 551 * @param port
 552 *   Handle to port scheduler instance
 553 * @param pkts
 554 *   Array storing the packet descriptor handles
 555 * @param n_pkts
 556 *   Number of packets to enqueue from the pkts array into the port scheduler
 557 * @return
 558 *   Number of packets successfully enqueued
 559 */
 560int
 561rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
 562
 563/**
 564 * Hierarchical scheduler port dequeue. Reads up to n_pkts from the
 565 * port scheduler and stores them in the pkts array and returns the
 566 * number of packets actually read.  The pkts array needs to be
 567 * pre-allocated by the caller with at least n_pkts entries.
 568 *
 569 * @param port
 570 *   Handle to port scheduler instance
 571 * @param pkts
 572 *   Pre-allocated packet descriptor array where the packets dequeued
 573 *   from the port
 574 *   scheduler should be stored
 575 * @param n_pkts
 576 *   Number of packets to dequeue from the port scheduler
 577 * @return
 578 *   Number of packets successfully dequeued and placed in the pkts array
 579 */
 580int
 581rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts);
 582
 583/**
 584 * Hierarchical scheduler subport traffic class
 585 * oversubscription enable/disable.
 586 * This function should be called at the time of subport initialization.
 587 *
 588 * @param port
 589 *   Handle to port scheduler instance
 590 * @param subport_id
 591 *   Subport ID
 592 * @param tc_ov_enable
 593 *  Boolean flag to enable/disable TC OV
 594 * @return
 595 *   0 upon success, error code otherwise
 596 */
 597__rte_experimental
 598int
 599rte_sched_subport_tc_ov_config(struct rte_sched_port *port, uint32_t subport_id, bool tc_ov_enable);
 600
 601#ifdef __cplusplus
 602}
 603#endif
 604
 605#endif /* __INCLUDE_RTE_SCHED_H__ */
 606